GIT dac2b3d34a495a462603b32bbcbbf569d940f6ac git+ssh://master.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.19.git

commit dac2b3d34a495a462603b32bbcbbf569d940f6ac
Author: Thomas Graf <tgraf@suug.ch>
Date:   Thu Aug 17 18:15:44 2006 -0700

    [IPv4]: Convert route get to new netlink api
    
    Fixes various unvalidated netlink attributes causing memory
    corruptions when left empty by userspace applications.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 1e7bb433ae05140ede5d7e5c5f700a25cc51dcdc
Author: Thomas Graf <tgraf@suug.ch>
Date:   Thu Aug 17 18:15:17 2006 -0700

    [IPv4]: Convert FIB dumping to use new netlink api
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit fa37368149873dd06b32cbeb82f27ba3ec7e0f9c
Author: Thomas Graf <tgraf@suug.ch>
Date:   Thu Aug 17 18:14:52 2006 -0700

    [IPv4]: FIB configuration using struct fib_config
    
    Introduces struct fib_config replacing the ugly struct kern_rta
    prone to ordering issues. Avoids creating faked netlink messages
    for auto generated routes or requests via ioctl.
    
    A new interface net/nexthop.h is added to help navigate through
    nexthop configuration arrays.
    
    A new struct nl_info will be used to carry the necessary netlink
    information to be used for notifications later on.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit c179a842dfe401f1ea050387022bd17bca509111
Author: Brian Haley <brian.haley@hp.com>
Date:   Tue Aug 15 01:37:57 2006 -0700

    [NET/IPV4/IPV6]: Change some sysctl variables to __read_mostly
    
    Change net/core, ipv4 and ipv6 sysctl variables to __read_mostly.
    
    Couldn't actually measure any performance increase while testing (.3%
    I consider noise), but seems like the right thing to do.
    
    Signed-off-by: Brian Haley <brian.haley@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 3aed3b005650efc3a5882eb4d0aea069a1d00987
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:37:29 2006 -0700

    [RTNETLINK]: Unexport rtnl socket
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 08635dcf01c997e87bfb045c258ee68cf68d65b0
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:37:09 2006 -0700

    [NET] link: Convert notifications to use rtnl_notify()
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 95a0d1f74fd2f69386ea438ef660cd507ac8451d
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:36:49 2006 -0700

    [WIRELESS]: Convert notifications to use rtnl_notify()
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 2a0eaf3d1d5947d112dfd9133cf063fef2258891
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:36:28 2006 -0700

    [BRIDGE]: Convert notifications to use rtnl_notify()
    
    Fixes a wrong use of current->pid as netlink pid.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 830a06b0a1ec704755abfb3d83e01d9ae69003c4
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:36:07 2006 -0700

    [IPv6] prefix: Convert prefix notifications to use rtnl_notify()
    
    Fixes a wrong use of current->pid as netlink pid.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 34689971f9463a36d5ea71c5e7392b8e99ca197a
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:35:47 2006 -0700

    [IPv6] link: Convert link notifications to use rtnl_notify()
    
    Fixes a wrong use of current->pid as netlink pid.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 42fb056519beff44cd7f7840d414d68eef965e7a
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:35:24 2006 -0700

    [IPv6] route: Convert route notifications to use rtnl_notify()
    
    Fixes a wrong use of current->pid as netlink pid.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 626d70d1f84c5185f07b3897aee944c3238eb8c7
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:35:02 2006 -0700

    [IPv6] address: Convert address notification to use rtnl_notify()
    
    Fixes a wrong use of current->pid as netlink pid.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 172665aa39278e37c28be25586e9e2f19fd54258
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:34:17 2006 -0700

    [IPv4] route: Convert route notifications to use rtnl_notify()
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 89f6a4838349b30c69c9760e5319cef3258ede60
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:33:59 2006 -0700

    [IPv4] address: Convert address notification to use rtnl_notify()
    
    Adds support for NLM_F_ECHO allowing applications to easly
    see which address have been deleted, added, or promoted.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 674091275c81a4674f9affef7dc1d68d842c0fb5
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:33:35 2006 -0700

    [DECNET]: Convert DECnet notifications to use rtnl_notify()
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 3003b029d8d5ac2d25ba161a9a847c172feead2b
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:33:14 2006 -0700

    [NEIGH]: Convert neighbour notifications ot use rtnl_notify()
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 35142cbb11145b02609de63b872d72b6f169b7dd
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:32:48 2006 -0700

    [NET] fib_rules: Convert fib rule notification to use rtnl_notify()
    
    Adds support for NLM_F_ECHO to simplify the process of identifying
    inserted rules with an auto generated priority.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit e1a32d505cc695f9cc2a73d69047b07d406bf931
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:31:41 2006 -0700

    [RTNETLINK]: Add rtnetlink notification interface
    
    Adds rtnl_notify() to send rtnetlink notification messages and
    rtnl_set_sk_err() to report notification errors as socket
    errors in order to indicate the need of a resync due to loss
    of events.
    
    nlmsg_report() is added to properly document the meaning of
    NLM_F_ECHO.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 3a25fd03caf8981d24cfd2aa3ab0d1610e049c00
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:31:06 2006 -0700

    [NETLINK]: Add notification message sending interface
    
    Adds nlmsg_notify() implementing proper notification logic. The
    message is multicasted to all listeners in the group. The
    applications the requests orignates from can request a unicast
    back report in which case said socket will be excluded from the
    multicast to avoid duplicated notifications.
    
    nlmsg_multicast() is extended to take allocation flags to
    allow notification in atomic contexts.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit f93d35beed509176364f19cb94fe819c629329f8
Author: Thomas Graf <tgraf@suug.ch>
Date:   Tue Aug 15 00:30:25 2006 -0700

    [RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit b01e4e7e211a64e53cad5848b10f1c91428f9360
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Tue Aug 15 00:15:41 2006 -0700

    [IPV6] udp: Fix type in previous change.
    
    UDPv6 stats are UDP6_foo not UDP_foo.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 73506a03b70ff3cd6db89b141a67e64833c6aea8
Author: Stphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
Date:   Tue Aug 15 00:09:17 2006 -0700

    [ARCNET]: SoHard PCI support
    
    From: Stphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
    
    Add support for a SoHard PCI ARCnet card.
    
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit b8003637e837d36f4d0c69b88a265ba3496ff062
Author: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date:   Tue Aug 15 00:05:38 2006 -0700

    [IRDA]: Replace hard-coded dev_self[] array sizes with ARRAY_SIZE()
    
    Several IR drivers used "for (i = 0; i < 4; i++)" to walk their
    dev_self[] table.  Better to use ARRAY_SIZE().  And fix ali-ircc so it
    won't run off the end if we find too many adapters.
    
    Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit d9aa45c6ab06f8051406fda4b7b48825723c1c6b
Author: Adrian Bunk <bunk@stusta.de>
Date:   Tue Aug 15 00:03:53 2006 -0700

    [SELINUX]: security/selinux/hooks.c: Make 4 functions static.
    
    This patch makes four needlessly global functions static.
    
    Signed-off-by: Adrian Bunk <bunk@stusta.de>
    Acked-by: James Morris <jmorris@namei.org>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 5b4e3cf108e96b1b61b439d84baa2e1ac76086af
Author: Alan Cox <alan@redhat.com>
Date:   Tue Aug 15 00:01:05 2006 -0700

    [NETFILTER]: Make unused signal code go away so nobody copies its brokenness
    
    This code is wrong on so many levels, please lose it so it isn't
    replicated anywhere else.
    
    Signed-off-by: Alan Cox <alan@redhat.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit aaeb560affae9acd571b83e5ff930341f7ea977c
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Tue Aug 15 00:00:09 2006 -0700

    [IPV6]: Add UDP_MIB_{SND,RCV}BUFERRORS handling.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 338820d95f73b9c8bd6e9a6c974b46305c04fa46
Author: Martin Bligh <mbligh@google.com>
Date:   Mon Aug 14 23:57:10 2006 -0700

    [IPV4]: add the UdpSndbufErrors and UdpRcvbufErrors MIBs
    
    Signed-off-by: Martin Bligh <mbligh@google.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>

commit 646bd1dcf495d132f7a245ed4fa7b4cdd79d57df
Author: Adrian Bunk <bunk@stusta.de>
Date:   Mon Aug 14 23:55:20 2006 -0700

    [DECNET]: cleanups
    
    - make the following needlessly global functions static:
      - dn_fib.c: dn_fib_sync_down()
      - dn_fib.c: dn_fib_sync_up()
      - dn_rules.c: dn_fib_rule_action()
    - remove the following unneeded prototype:
      - dn_fib.c: dn_cache_dump()
    
    Signed-off-by: Adrian Bunk <bunk@stusta.de>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 77581416442f2e7d4fbe42cc3c7e5bfa52f9b5e8
Author: Andreas Mohr <andi@lisas.de>
Date:   Mon Aug 14 23:54:30 2006 -0700

    [TG3]: Constify firmware structs
    
    Constify largish areas of firmware data in Tigon3 ethernet driver.
    
    non-const:
    
    lsmod:
    tg3                   101404  0
    
    objdump -x:
    .rodata 000003e8
    .data 00004a0c
    
    ls -l:
    -rw-r--r-- 1 root root 114404 2006-08-19 21:36 drivers/net/tg3.ko
    
    const:
    
    lsmod:
    tg3                   101404  0
    
    objdump -x:
    .rodata 000042c8
    .data 00000b4c
    
    ls -l:
    -rw-r--r-- 1 root root 114532 2006-08-19 21:06 drivers/net/tg3.ko
    
    Signed-off-by: Andreas Mohr <andi@lisas.de>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit ca6aa60c7a2bee9632ba76118cf89c5890ddff30
Author: Adrian Bunk <bunk@stusta.de>
Date:   Mon Aug 14 23:49:16 2006 -0700

    [IPV6] ip6_fib.c: make code static
    
    Make the following needlessly global code static:
    - fib6_walker_lock
    - struct fib6_walker_list
    - fib6_walk_continue()
    - fib6_walk()
    
    Signed-off-by: Adrian Bunk <bunk@stusta.de>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 879f7b4f8e72e86e653e6331b6dac3f15206c9fe
Author: Steven Whitehouse <steve@chygwyn.com>
Date:   Fri Aug 11 16:44:18 2006 -0700

    [DECNET] Fix to decnet rules compare function
    
    Here is a fix to the DECnet rules compare function where we used 32bit
    values rather than 16bit values. Spotted by Patrick McHardy.
    
    Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 08497df5d8c3e93a3eace5365f501f0ae39aaaed
Author: Steven Whitehouse <steve@chygwyn.com>
Date:   Fri Aug 11 16:43:41 2006 -0700

    [DECNET] Fix to multiple tables routing
    
    Here is a fix to Patrick McHardy's increase number of routing tables
    patch for DECnet. I did just test this and it appears to be working
    fine with this patch.
    
    Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
    Acked-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 075f99a391c8e250f6a6590719ce158b9e582654
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Fri Aug 11 16:42:10 2006 -0700

    [NET] netdev: Check name length
    
    Some improvements to robust name interface.  These API's are safe
    now by convention, but it is worth providing some safety checks
    against future bugs.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 99c4451081b0ea2107ba4827f7d518e1c739cf1b
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:36:01 2006 -0700

    [HTB]: rbtree cleanup
    
    Add code to initialize rb tree nodes, and check for double deletion.
    This is not a real fix, but I can make it trap sometimes and may
    be a bandaid for: http://bugzilla.kernel.org/show_bug.cgi?id=6681
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 635278f19c325ddf0a67dfd3599e14a638498d80
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:35:38 2006 -0700

    [HTB]: Use hlist for hash lists.
    
    Use hlist instead of list for the hash list. This saves
    space, and we can check for double delete better.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 9665a8537fd7bf75fd0af37be2bed150d93cf975
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:35:16 2006 -0700

    [HTB]: Lindent
    
    Code was a mess in terms of indentation.  Run through Lindent
    script, and cleanup the damage. Also, don't use, vim magic
    comment, and substitute inline for __inline__.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 831689203e655a35760ac9df20bcf5f593e62d3c
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:34:02 2006 -0700

    [HTB]: HTB_HYSTERESIS cleanup
    
    Change the conditional compilation around HTB_HYSTERSIS
    since code was splitting mid expression.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 4c4cbf4ef56bb93e0d6694466559f578f860e190
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:33:16 2006 -0700

    [HTB]: Remove lock macro.
    
    Get rid of the macro's being used to obscure the locking.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 4cd9252d6e674d80142e247651f8a715408f5101
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:31:08 2006 -0700

    [HTB]: Remove broken debug code.
    
    The HTB network scheduler had debug code that wouldn't compile
    and confused and obfuscated the code, remove it.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 9b2731e8e54c71e921067a95424e2ad4cde79d7e
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 23:12:34 2006 -0700

    [NET]: Increate RT_TABLE_MAX to 2^32
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 80dfdb999739e452ee39f8bf3be06425bcd849af
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 23:11:47 2006 -0700

    [DECNET]: Increase number of possible routing tables to 2^32
    
    Increase the number of possible routing tables to 2^32 by replacing the
    fixed sized array of pointers by a hash table and replacing iterations
    over all possible table IDs by hash table walking.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 619ee759ad9bebf369b97e36485d4a519530a1c5
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 23:11:17 2006 -0700

    [IPV6]: Increase number of possible routing tables to 2^32
    
    Increase number of possible routing tables to 2^32 by replacing iterations
    over all possible table IDs by hash table walking.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 148a543ac4ade94a9a66598edf5ab211b6322dd1
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 23:10:46 2006 -0700

    [IPV4]: Increase number of possible routing tables to 2^32
    
    Increase the number of possible routing tables to 2^32 by replacing the
    fixed sized array of pointers by a hash table and replacing iterations
    over all possible table IDs by hash table walking.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit af30d06ec08f2cd02934706de1312291737c8b0d
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 23:09:48 2006 -0700

    [NET]: Introduce RTA_TABLE/FRA_TABLE attributes
    
    Introduce RTA_TABLE route attribute and FRA_TABLE routing rule attribute
    to hold 32 bit routing table IDs. Usespace compatibility is provided by
    continuing to accept and send the rtm_table field, but because of its
    limited size it can only carry the low 8 bits of the table ID. This
    implies that if larger IDs are used, _all_ userspace programs using them
    need to use RTA_TABLE.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit f313cb6e9fd6617cae005e7fec06555dc7da23e4
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 23:08:33 2006 -0700

    [NET]: Use u32 for routing table IDs
    
    Use u32 for routing table IDs in net/ipv4 and net/decnet in preparation of
    support for a larger number of routing tables. net/ipv6 already uses u32
    everywhere and needs no further changes. No functional changes are made by
    this patch.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 366c8c5bf63302fb26c4cdf58cdd044d023374b4
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 23:03:23 2006 -0700

    [NEIGHBOUR]: Use ALIGN() macro.
    
    Rather than opencoding the mask, it looks better to use ALIGN()
    macro from kernel.h.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 17546e3433798d8a96f33f76720d7a249d15745a
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Thu Aug 10 00:22:41 2006 -0700

    [NET]: Kill double initialization in sock_alloc_inode.
    
    No need to set ei->socket.flags to zero twice.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit c2315078edbc87d6e99718b354c036456d42eed1
Author: Dave Jones <davej@redhat.com>
Date:   Wed Aug 9 21:04:21 2006 -0700

    [NET]: Remove unnecessary config.h includes from net/
    
    config.h is automatically included by kbuild these days.
    
    Signed-off-by: Dave Jones <davej@redhat.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit c5310742b663d01ae237ce70af9706ec081024f3
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Wed Aug 9 21:03:17 2006 -0700

    [NET]: sock_register interface changes
    
    The sock_register() doesn't change the family, so the protocols can
    define it read-only.  No caller ever checks return value from
    sock_unregister()
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit e5fd60049d0c3dfcda5620cf0549ab34cc4a31b8
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Wed Aug 9 21:02:10 2006 -0700

    [NET]: socket family using RCU
    
    Replace the gross custom locking done in socket code for net_family[]
    with simple RCU usage. Some reordering necessary to avoid sleep issues
    with sock_alloc.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 391e177c7ea973bfd0a7ead8545f3149311f3cbd
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Wed Aug 9 20:50:00 2006 -0700

    [NET]: drop unused elements from net_proto_family
    
    Three values in net_proto_family are defined but never used.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 079374f7ca697bf2b9187d8d42c280678c0e44a9
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 21:28:26 2006 -0700

    [NET] socket: code style cleanup
    
    Make socket.c conform to current style:
    	* run through Lindent
    	* get rid of unneeded casts
    	* split assignment and comparsion where possible
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 49d2f8a14678d63ad48fec6471e62c56040c19bf
Author: Sridhar Samudrala <sri@us.ibm.com>
Date:   Wed Aug 9 17:03:17 2006 -0700

    [SUNRPC]: Remove the unnecessary check for highmem in xs_sendpages().
    
    Just call kernel_sendpage() directly.
    
    Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit ec7c40e0afba04e6341a25999b01ef770a56564a
Author: Steven Whitehouse <steve@chygwyn.com>
Date:   Wed Aug 9 16:00:57 2006 -0700

    [DECNET]: Convert rwlock to spinlock
    
    As per Stephen Hemminger's recent patch to ipv4/fib_semantics.c this
    is the same change but for DECnet.
    
    Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 9768bedab00fe52626c8dde3238719fceab59054
Author: Steven Whitehouse <steve@chygwyn.com>
Date:   Wed Aug 9 15:56:46 2006 -0700

    [DECNET]: Covert rules to use generic code
    
    This patch converts the DECnet rules code to use the generic
    rules system created by Thomas Graf <tgraf@suug.ch>.
    
    Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
    Acked-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 749fca28501a974d5432e63867a8a07f9363eb04
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Wed Aug 9 15:47:12 2006 -0700

    [IPV4]: Use network-order dport for all visible inet_lookup_*
    
    Right now most inet_lookup_* functions take a host-order hnum instead
    of a network-order dport because that's how it is represented
    internally.
    
    This means that users of these functions have to be careful about
    using the right byte-order.  To add more confusion, inet_lookup takes
    a network-order dport unlike all other functions.
    
    So this patch changes all visible inet_lookup functions to take a
    dport and move all dport->hnum conversion inside them.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 891833208e1620681369f6a98155c8bab813c4c6
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Tue Aug 8 16:51:52 2006 -0700

    [IPV4] fib: convert reader/writer to spinlock
    
    Ther is no point in using a more expensive reader/writer lock
    for a low contention lock like the fib_info_lock. The only
    reader case is in handling route redirects.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 36c1a854b9b825018d4c3868c654be547cdd67b5
Author: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date:   Tue Aug 8 16:44:17 2006 -0700

    [IPV6]: Make sure fib6_rule_lookup doesn't return NULL
    
    The callers of fib6_rule_lookup don't expect it to return NULL,
    therefore it must return ip6_null_entry whenever fib_rule_lookup fails.
    
    Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 5be8c453882270207b71b7ad503e85307aaa9409
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue Aug 8 02:18:10 2006 -0700

    [IPV4]: Uninline inet_lookup_listener
    
    By modern standards this function is way too big to be inlined.  It's
    even bigger than __inet_lookup_listener :)
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 3dad72203a2132b9a639321a9fc452603dec6f53
Author: Louis Nyffenegger <louis.nyffenegger@gmail.com>
Date:   Tue Aug 8 00:56:11 2006 -0700

    [INET]: Remove is_setbyuser patch
    
    The value is_setbyuser from struct ip_options is never used and set
    only one time (http://linux-net.osdl.org/index.php/TODO#IPV4).
    This little patch removes it from the kernel source.
    
    Signed-off-by: Louis Nyffenegger <louis.nyffenegger@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 5e83ed21e615d5cf43d135b54cf72cffa0c66bd4
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Mon Aug 7 21:56:52 2006 -0700

    [IPV4]: Kill fib4_rules_clean().
    
    As noted by Adrian Bunk this function is totally unused.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 29f8d6ed5c507be037b1dc6605122220226f8579
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Mon Aug 7 21:54:37 2006 -0700

    [IPV6]: Protect RTM_GETRULE table entry with IPV6_MULTIPLE_TABLES ifdef
    
    This is how IPv4 handles this case too.
    
    Based upon a patch from Andrew Morton.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 51ecaeba29979877caada6e5066ff29773139934
Author: Adrian Bunk <bunk@stusta.de>
Date:   Mon Aug 7 21:50:48 2006 -0700

    [NET]: Make code static.
    
    This patch makes needlessly global code static.
    
    Signed-off-by: Adrian Bunk <bunk@stusta.de>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 85dc8f37f5fb41b1e8e431ff6d46005ab9221419
Author: Sridhar Samudrala <sri@us.ibm.com>
Date:   Mon Aug 7 20:58:01 2006 -0700

    [SUNRPC]: Update to use in-kernel sockets API.
    
    Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
    Acked-by: James Morris <jmorris@namei.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 6db6eec160a668a6313017bde8086ea7cab92a64
Author: Sridhar Samudrala <sri@us.ibm.com>
Date:   Mon Aug 7 20:57:31 2006 -0700

    [NET]: Round out in-kernel sockets API
    
    This patch implements wrapper functions that provide a convenient way
    to access the sockets API for in-kernel users like sunrpc, cifs &
    ocfs2 etc and any future users.
    
    Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
    Acked-by: James Morris <jmorris@namei.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit bb8d56287f3667e2371d92e67db5813dc45ce12d
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 18:00:57 2006 -0700

    [NEIGH]: Move netlink neighbour table bits to linux/neighbour.h
    
    rtnetlink_rcv_msg() is not longer required to parse attributes
    for the neighbour tables layer, remove dependency on obsolete and
    buggy rta_buf.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit e5b035ff183d787193982f6c8f69b98d624c1362
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 18:00:18 2006 -0700

    [NEIGH]: Convert neighbour table dumping to new netlink api
    
    Also fixes skipping of already dumped neighbours.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit ab5f0a7ea59f2a8808991233f63cb3f6cf4bce48
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 17:58:53 2006 -0700

    [NEIGH]: Convert neighbour table modification to new netlink api
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit cc9bc1e23ca4c758945c32879c6223730ab9265c
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 17:57:44 2006 -0700

    [NEIGH]: Move netlink neighbour bits to linux/neighbour.h
    
    Moves netlink neighbour bits to linux/neighbour.h. Also
    moves bits to be exported to userspace from net/neighbour.h
    to linux/neighbour.h and removes __KERNEL__ guards, userspace
    is not supposed to be using it.
    
    rtnetlink_rcv_msg() is not longer required to parse attributes
    for the neighbour layer, remove dependency on obsolete and
    buggy rta_buf.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 139c0cbc532db068167cad67f7f0f4da6f5a1d11
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 17:56:37 2006 -0700

    [NEIGH]: Convert neighbour dumping to new netlink api
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 1ffd2eb7cffeb90bab4bdeadb00678bae7dd1f9e
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 17:55:40 2006 -0700

    [NEIGH]: Convert neighbour addition to new netlink api
    
    Fixes:
        Return EAFNOSUPPORT if no table matches the specified
        address family.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 907941292894917111a28de0cd4bd8609235159e
Author: Thomas Graf <tgraf@suug.ch>
Date:   Mon Aug 7 17:53:08 2006 -0700

    [NEIGH]: Convert neighbour deletion to new netlink api
    
    Fixes:
      Return ENOENT if the neighbour is not found (was EINVAL)
      Return EAFNOSUPPORT if no table matches the specified
      address family.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit a91996465869c662d263eb1cf119da60260666af
Author: Patrick McHardy <kaber@trash.net>
Date:   Sun Aug 6 22:24:08 2006 -0700

    [IPV6]: Fix policy routing lookup
    
    When the lookup in a table returns ip6_null_entry the policy routing lookup
    returns it instead of continuing in the next table, which effectively means
    it only searches the local table.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 754824d8e3453fef8db6aec4b2af469b4ab02e40
Author: Patrick McHardy <kaber@trash.net>
Date:   Sun Aug 6 22:22:47 2006 -0700

    [IPV6]: Fix crash in ip6_del_rt
    
    ip6_null_entry doesn't have rt6i_table set, when trying to delete it the
    kernel crashes dereferencing table->tb6_lock.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit dc7be3e936712eb0bdf60068f279d330788ff5e4
Author: Patrick McHardy <kaber@trash.net>
Date:   Sat Aug 5 02:20:42 2006 -0700

    [IPV6]: Fix thinko in rt6_fill_node
    
    This looks like a mistake, the table ID is overwritten again.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 5d13cbef2a379a1ddd056f306b4afa2041278563
Author: Patrick McHardy <kaber@trash.net>
Date:   Sat Aug 5 00:58:52 2006 -0700

    [NETFILTER]: nf_queue: handle GSO packets
    
    Handle GSO packets in nf_queue by segmenting them before queueing to
    avoid breaking GSO in case they get mangled.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 434b190ce4ce2b1ab0008fff61312acba0026cdf
Author: Patrick McHardy <kaber@trash.net>
Date:   Sat Aug 5 00:58:33 2006 -0700

    [NETFILTER]: Get rid of HW checksum invalidation
    
    Update hardware checksums incrementally to avoid breaking GSO.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 308e125410020b5e9367973db48b1256a4c9b4d5
Author: Patrick McHardy <kaber@trash.net>
Date:   Sat Aug 5 00:57:21 2006 -0700

    [NET]: Replace CHECKSUM_HW by CHECKSUM_PARTIAL/CHECKSUM_COMPLETE
    
    Replace CHECKSUM_HW by CHECKSUM_PARTIAL (for outgoing packets, whose
    checksum still needs to be completed) and CHECKSUM_COMPLETE (for
    incoming packets, device supplied full checksum).
    
    Patch originally from Herbert Xu, updated by myself for 2.6.18-rc3.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 2c092d171a04a79e15f4620c2f975006ca7d981f
Author: Patrick McHardy <kaber@trash.net>
Date:   Sat Aug 5 00:56:16 2006 -0700

    [NETFILTER]: netbios conntrack: fix compile
    
    Fix compile breakage caused by move of IFA_F_SECONDARY to new header
    file.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit cd58d78e404dd97196b9df4baee5e214e26558d1
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:05:56 2006 -0700

    [NET]: Move netlink interface bits to linux/if.h
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 35c377d97d5d2a09fefec04a604b4176b707c485
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:05:34 2006 -0700

    [NET]: Convert link dumping to new netlink api
    
    Transforms netlink code to dump link tables to use the new
    netlink api. Makes rtnl_getlink() available regardless of the
    availability of the wireless extensions.
    
    Adding copy_rtnl_link_stats() avoids the structural dependency
    of struct rtnl_link_stats on struct net_device_stats and thus
    avoids troubles later on.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 948dba63cae2e8fc57744d42a315a8702250fb64
Author: Thomas Graf <tgraf@suug.ch>
Date:   Thu Aug 10 21:17:37 2006 -0700

    [NET]: Convert link modification to new netlink api
    
    Transforms do_setlink() into rtnl_setlink() using the new
    netlink api. A warning message printed to the console is
    added in the event that a change request fails while part
    of the change request has been comitted already. The ioctl()
    based nature of net devices makes it almost impossible to
    move on to atomic netlink operations without obsoleting
    some of the functionality.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 7a830ef0212775fa285e2d8bd063be2d6c79da3a
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:04:54 2006 -0700

    [IPv4]: Move interface address bits to linux/if_addr.h
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 5b9ebf51e9fa211f5b0fe6b7298537c52021a93f
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:04:36 2006 -0700

    [IPV4]: Convert address dumping to new netlink api
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 2da351d28edf166312eb2c39a4d997d9a75e4d43
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:04:17 2006 -0700

    [IPV4]: Convert address deletion to new netlink api
    
    Fixes various unvalidated netlink attributes causing
    memory corruptions when left empty by userspace.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 8fdf17713dcf0aca92bc39e2596f9ff991af04da
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:03:53 2006 -0700

    [IPV4]: Convert address addition to new netlink api
    
    Adds rtm_to_ifaddr() transforming a netlink message to a
    struct in_ifaddr. Fixes various unvalidated netlink attributes
    causing memory corruptions when left empty by userspace
    applications.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit a024f6fdb2258786cbf5ec1bf6cb4289bfcddfc9
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:03:29 2006 -0700

    [NETLINK]: Convert core netlink handling to new netlink api
    
    Fixes a theoretical memory and locking leak when the size of
    the netlink header would exceed the skb tailroom.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 9b9287a56536982c33b0990f0a5945bf3010450b
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:03:05 2006 -0700

    [NETLINK]: Extend netlink messaging interface
    
    Adds:
     nlmsg_get_pos()                 return current position in message
     nlmsg_trim()                    trim part of message
     nla_reserve_nohdr(skb, len)     reserve room for an attribute w/o hdr
     nla_put_nohdr(skb, len, data)   add attribute w/o hdr
     nla_find_nested()               find attribute in nested attributes
    
    Fixes nlmsg_new() to take allocation flags and consider size.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 381034695c7d0656450b6720479196ea56fc0cc3
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 03:39:22 2006 -0700

    [IPV4]: Use Protocol Independant Policy Routing Rules Framework
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit ee0df4b61b1dc7eb53192e3f91d75f581fdf06eb
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 03:39:02 2006 -0700

    [IPV6]: Policy Routing Rules
    
    Adds support for policy routing rules including a new
    local table for routes with a local destination.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit d7853be710629717e112021eca4258a505dc2ca8
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 03:38:38 2006 -0700

    [NET]: Protocol Independant Policy Routing Rules Framework
    
    Derived from net/ipv/fib_rules.c
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit a1fded43a8cab055032226f96c86a61a23d2c6bc
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 23:20:06 2006 -0700

    [IPV6]: Multiple Routing Tables
    
    Adds the framework to support multiple IPv6 routing tables.
    Currently all automatically generated routes are put into the
    same table. This could be changed at a later point after
    considering the produced locking overhead.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 2ce9a882a4a4a869b12c8052097ccb89aae7220f
Author: Thomas Graf <tgraf@suug.ch>
Date:   Fri Aug 4 03:37:36 2006 -0700

    [IPV6]: Remove ndiscs rt6_lock dependency
    
    (Ab)using rt6_lock wouldn't work anymore if rt6_lock is
    converted into a per table lock.
    
    Signed-off-by: Thomas Graf <tgraf@suug.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 88975d92e738071ee9f34733e92a5840b9a4d38b
Author: Paul Moore <paul.moore@hp.com>
Date:   Thu Aug 3 16:50:39 2006 -0700

    [NetLabel]: tie NetLabel into the Kconfig system
    
    Modify the net/Kconfig file to enable selecting the NetLabel Kconfig
    options.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit d9b8c6bfbfcf5356b19e682ae7c235df28daed1e
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Fri Aug 4 23:17:57 2006 -0700

    [NetLabel]: SELinux support
    
    Add NetLabel support to the SELinux LSM and modify the
    socket_post_create() LSM hook to return an error code.  The most
    significant part of this patch is the addition of NetLabel hooks into
    the following SELinux LSM hooks:
    
     * selinux_file_permission()
     * selinux_socket_sendmsg()
     * selinux_socket_post_create()
     * selinux_socket_sock_rcv_skb()
     * selinux_socket_getpeersec_stream()
     * selinux_socket_getpeersec_dgram()
     * selinux_sock_graft()
     * selinux_inet_conn_request()
    
    The basic reasoning behind this patch is that outgoing packets are
    "NetLabel'd" by labeling their socket and the NetLabel security
    attributes are checked via the additional hook in
    selinux_socket_sock_rcv_skb().  NetLabel itself is only a labeling
    mechanism, similar to filesystem extended attributes, it is up to the
    SELinux enforcement mechanism to perform the actual access checks.
    
    In addition to the changes outlined above this patch also includes
    some changes to the extended bitmap (ebitmap) and multi-level security
    (mls) code to import and export SELinux TE/MLS attributes into and out
    of NetLabel.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 7a5c51ddde0cef9f97e776c32d05a29037a8259e
Author: Paul Moore <paul.moore@hp.com>
Date:   Thu Aug 3 16:48:59 2006 -0700

    [NetLabel]: CIPSOv4 and Unlabeled packet integration
    
    Add CIPSO/IPv4 and unlabeled packet management to the NetLabel
    subsystem.  The CIPSO/IPv4 changes allow the configuration of
    CIPSO/IPv4 within the overall NetLabel framework.  The unlabeled
    packet changes allows NetLabel to pass unlabeled packets without
    error.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 625eeb119b7939eddf39486352867e8af2897f99
Author: Paul Moore <paul.moore@hp.com>
Date:   Thu Aug 3 16:48:37 2006 -0700

    [NetLabel]: core NetLabel subsystem
    
    Add a new kernel subsystem, NetLabel, to provide explicit packet
    labeling services (CIPSO, RIPSO, etc.) to LSM developers.  NetLabel is
    designed to work in conjunction with a LSM to intercept and decode
    security labels on incoming network packets as well as ensure that
    outgoing network packets are labeled according to the security
    mechanism employed by the LSM.  The NetLabel subsystem is configured
    through a Generic NETLINK interface described in the header files
    included in this patch.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 4a4478fa3fe5425768e84c6cc02f930ba1151b48
Author: Paul Moore <paul.moore@hp.com>
Date:   Thu Aug 3 16:48:06 2006 -0700

    [NetLabel]: CIPSOv4 engine
    
    Add support for the Commercial IP Security Option (CIPSO) to the IPv4
    network stack.  CIPSO has become a de-facto standard for
    trusted/labeled networking amongst existing Trusted Operating Systems
    such as Trusted Solaris, HP-UX CMW, etc.  This implementation is
    designed to be used with the NetLabel subsystem to provide explicit
    packet labeling to LSM developers.
    
    The CIPSO/IPv4 packet labeling works by the LSM calling a NetLabel API
    function which attaches a CIPSO label (IPv4 option) to a given socket;
    this in turn attaches the CIPSO label to every packet leaving the
    socket without any extra processing on the outbound side.  On the
    inbound side the individual packet's sk_buff is examined through a
    call to a NetLabel API function to determine if a CIPSO/IPv4 label is
    present and if so the security attributes of the CIPSO label are
    returned to the caller of the NetLabel API function.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit bcfc1ddec2ed2bddaa9c21648a169593b10c46e7
Author: Paul Moore <paul.moore@hp.com>
Date:   Thu Aug 3 16:46:20 2006 -0700

    [NetLabel]: core network changes
    
    Changes to the core network stack to support the NetLabel subsystem.  This
    includes changes to the IPv4 option handling to support CIPSO labels.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit cca62386473eeb4aa6b4c3c1801866f65ff02c2f
Author: Paul Moore <paul.moore@hp.com>
Date:   Thu Aug 3 16:45:49 2006 -0700

    [NetLabel]: documentation
    
    Documentation for the NetLabel system, this includes a basic overview
    of how NetLabel works, how LSM developers can integrate it into their
    favorite LSM, as well as documentation on the CIPSO related sysctl
    variables.  Also, due to the difficulty of finding expired IETF
    drafts, I am including the IETF CIPSO draft that is the basis of the
    NetLabel CIPSO implementation.
    
    Signed-off-by: Paul Moore <paul.moore@hp.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 0db7389dc39868aabf79831f3ec3fd8a97645899
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Thu Jul 27 22:01:34 2006 -0700

    [MLSXFRM]: Fix build with SECURITY_NETWORK_XFRM disabled.
    
    The following patch will fix the build problem (encountered by Andrew
    Morton) when SECURITY_NETWORK_XFRM is not enabled.
    
    As compared to git-net-selinux_xfrm_decode_session-build-fix.patch in
    -mm, this patch sets the return parameter sid to SECSID_NULL in
    selinux_xfrm_decode_session() and handles this value in the caller
    selinux_inet_conn_request() appropriately.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Acked-by: James Morris <jmorris@namei.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 214dfb98764e3985c17f99ea3d8c94a442b0ff69
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:32:50 2006 -0700

    [MLSXFRM]: Auto-labeling of child sockets
    
    This automatically labels the TCP, Unix stream, and dccp child sockets
    as well as openreqs to be at the same MLS level as the peer. This will
    result in the selection of appropriately labeled IPSec Security
    Associations.
    
    This also uses the sock's sid (as opposed to the isec sid) in SELinux
    enforcement of secmark in rcv_skb and postroute_last hooks.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 81d7158098f9d4c4c1663986c1526952e344c9ba
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:32:20 2006 -0700

    [MLSXFRM]: Default labeling of socket specific IPSec policies
    
    This defaults the label of socket-specific IPSec policies to be the
    same as the socket they are set on.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 9e7a66e362d650bb3526e9b58a75cdc67b42b29f
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Fri Aug 4 23:12:42 2006 -0700

    [MLSXFRM]: Add flow labeling
    
    This labels the flows that could utilize IPSec xfrms at the points the
    flows are defined so that IPSec policy and SAs at the right label can
    be used.
    
    The following protos are currently not handled, but they should
    continue to be able to use single-labeled IPSec like they currently
    do.
    
    ipmr
    ip_gre
    ipip
    igmp
    sit
    sctp
    ip6_tunnel (IPv6 over IPv6 tunnel device)
    decnet
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 054bd89016d3ec98791cd431da5b42fe943b1dae
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:31:14 2006 -0700

    [MLSXFRM]: Add security context to acquire messages using PF_KEY
    
    This includes the security context of a security association created
    for use by IKE in the acquire messages sent to IKE daemons using
    PF_KEY. This would allow the daemons to include the security context
    in the negotiation, so that the resultant association is unique to
    that security context.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 93f1dc8ced88a418ec810d27dd969eaef922598d
Author: Serge Hallyn <serue@us.ibm.com>
Date:   Mon Jul 24 23:30:44 2006 -0700

    [MLSXFRM]: Add security context to acquire messages using netlink
    
    This includes the security context of a security association created
    for use by IKE in the acquire messages sent to IKE daemons using
    netlink/xfrm_user. This would allow the daemons to include the
    security context in the negotiation, so that the resultant association
    is unique to that security context.
    
    Signed-off-by: Serge Hallyn <serue@us.ibm.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 4927ea61bbc77dcd8886094774eb6871d478ffc2
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:29:07 2006 -0700

    [MLSXFRM]: Flow based matching of xfrm policy and state
    
    This implements a seemless mechanism for xfrm policy selection and
    state matching based on the flow sid. This also includes the necessary
    SELinux enforcement pieces.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit e8066b907bf03920e2e1b22961f8ff96e0796f98
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:28:37 2006 -0700

    [MLSXFRM]: Add security sid to flowi
    
    This adds security to flow key for labeling of flows as also to allow
    for making flow cache lookups based on the security label seemless.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 278571ff86b85d663d9e64d2f7611fa27f384ebb
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Fri Aug 4 23:08:56 2006 -0700

    [MLSXFRM]: Add security sid to sock
    
    This adds security for IP sockets at the sock level. Security at the
    sock level is needed to enforce the SELinux security policy for
    security associations even when a sock is orphaned (such as in the TCP
    LAST_ACK state).
    
    This will also be used to enforce SELinux controls over data arriving
    at or leaving a child socket while it's still waiting to be accepted.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit a02d86b11b5dec8beeeed7258f59a496e8724425
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:27:16 2006 -0700

    [MLSXFRM]: Define new SELinux service routine
    
    This defines a routine that combines the Type Enforcement portion of
    one sid with the MLS portion from the other sid to arrive at a new
    sid. This would be used to define a sid for a security association
    that is to be negotiated by IKE as well as for determing the sid for
    open requests and connection-oriented child sockets.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 178f33c0a9e746bbe6fc7066ef58b754b45a1784
Author: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date:   Mon Jul 24 23:26:30 2006 -0700

    [MLSXFRM]: Granular IPSec associations for use in MLS environments
    
    The current approach to labeling Security Associations for SELinux
    purposes uses a one-to-one mapping between xfrm policy rules and
    security associations.
    
    This doesn't address the needs of real world MLS (Multi-level System,
    traditional Bell-LaPadula) environments where a single xfrm policy
    rule (pertaining to a range, classified to secret for example) might
    need to map to multiple Security Associations (one each for
    classified, secret, top secret and all the compartments applicable to
    these security levels).
    
    This patch set addresses the above problem by allowing for the mapping
    of a single xfrm policy rule to multiple security associations, with
    each association used in the security context it is defined for. It
    also includes the security context to be used in IKE negotiation in
    the acquire messages sent to the IKE daemon so that a unique SA can be
    negotiated for each unique security context. A couple of bug fixes are
    also included; checks to make sure the SAs used by a packet match
    policy (security context-wise) on the inbound and also that the bundle
    used for the outbound matches the security context of the flow. This
    patch set also makes the use of the SELinux sid in flow cache lookups
    seemless by including the sid in the flow key itself. Also, open
    requests as well as connection-oriented child sockets are labeled
    automatically to be at the same level as the peer to allow for use of
    appropriately labeled IPSec associations.
    
    Description of changes:
    
    A "sid" member has been added to the flow cache key resulting in the
    sid being available at all needed locations and the flow cache lookups
    automatically using the sid. The flow sid is derived from the socket
    on the outbound and the SAs (unlabeled where an SA was not used) on
    the inbound.
    
    Outbound case:
    1. Find policy for the socket.
    
    2. OLD: Find an SA that matches the policy.
     NEW: Find an SA that matches BOTH the policy and the flow/socket.
       This is necessary since not every SA that matches the policy
       can be used for the flow/socket. Consider policy range Secret-TS,
       and SAs each for Secret and TS. We don't want a TS socket to
       use the Secret SA. Hence the additional check for the SA Vs. flow/socket.
    
    3. NEW: When looking thru bundles for a policy, make sure the
            flow/socket can use the bundle. If a bundle is not found,
            create one, calling for IKE if necessary. If using IKE,
            include the security context in the acquire message to the IKE
            daemon.
    
    Inbound case:
    1. OLD: Find policy for the socket.
     NEW: Find policy for the incoming packet based on the sid of the
          SA(s) it used or the unlabeled sid if no SAs were
          used. (Consider a case where a socket is "authorized" for two
          policies (unclassified-confidential, secret-top_secret). If the
          packet has come in using a secret SA, we really ought to be
          using the latter policy (secret-top_secret).)
    
    2. OLD: BUG: No check to see if the SAs used by the packet agree with
                 the policy sec_ctx-wise.
    
                 (It was indicated in selinux_xfrm_sock_rcv_skb() that
                  this was being accomplished by
                  (x->id.spi == tmpl->id.spi || !tmpl->id.spi) in xfrm_state_ok,
    	      but it turns out tmpl->id.spi
                  would normally be zero (unless xfrm policy rules specify one
                  at the template level, which they usually don't).
     NEW: The socket is checked for access to the SAs used (based on the
          sid of the SAs) in selinux_xfrm_sock_rcv_skb().
    
    Forward case:
     This would be Step 1 from the Inbound case, followed by Steps 2 and 3
    from the Outbound case.
    
    Outstanding items/issues:
    
    - Timewait acknowledgements and such are generated in the
      current/upstream implementation using a NULL socket resulting in the
      any_socket sid (SYSTEM_HIGH) to be used. This problem is not addressed
      by this patch set.
    
    This patch: Add new flask definitions to SELinux
    
    Adds a new avperm "polmatch" to arbitrate flow/state access to a xfrm
    policy rule.
    
    Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 96e0492cdf6ae173d8177ac66e7a734694521d7b
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Thu Aug 10 17:35:45 2006 -0700

    [LLC]: multicast receive device match
    
    Fix from Aji_Srinivas@emc.com, STP packets are incorrectly received on
    all LLC datagram sockets, whichever interface they are bound to.  The
    llc_sap datagram receive logic sends packets with a unicast
    destination MAC to one socket bound to that SAP and MAC, and multicast
    packets to all sockets bound to that SAP. STP packets are multicast,
    and we do need to know on which interface they were received.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 69ca4d01d1a789243ae9d8db45fdd1c033c9cb5c
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Thu Aug 10 17:28:09 2006 -0700

    [XFRM]: Revert bogus change to xfrm_dst_check().
    
    As noticed by Herbert Xu, the change made to xfrm_dst_check() in:
    
    399c180ac5f0cb66ef9479358e0b8b6bafcbeafe
    
    to make xfrm_dst_check() always return NULL is bogus, because the way
    xfrm4_policy.c works is to set dst->obsolete to -1 so that every use
    validates the route.
    
    In effect, this essentially disabled xfrm_dst caching altogether.
    
    So put back the old code which checks things out using stale_bundle().
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit f811c65a565c47b9d3198cf89f7536a2642c170f
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Aug 10 16:55:56 2006 -0700

    [NETFILTER]: xt_hashlimit: fix limit off-by-one
    
    Hashlimit doesn't account for the first packet, which is inconsistent
    with the limit match.
    
    Reported by ryan.castellucci@gmail.com, netfilter bugzilla #500.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit eb65b29458b08c9cb99f29de82768c91dfa56288
Author: Phil Oester <kernel@linuxace.com>
Date:   Thu Aug 10 16:55:28 2006 -0700

    [NETFILTER]: xt_string: fix negation
    
    The xt_string match is broken with ! negation.
    This resolves a portion of netfilter bugzilla #497.
    
    Signed-off-by: Phil Oester <kernel@linuxace.com>
    Signed-off-by: Patrick McHardy <kaber@trash.net>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit a7fc5b24a4921a6582ce47c0faf3a31858a80468
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Thu Aug 10 16:53:33 2006 -0700

    [TCP]: Fix botched memory leak fix to tcpprobe_read().
    
    Somehow I clobbered James's original fix and only my
    subsequent compiler warning change went in for that
    changeset.
    
    Get the real fix in there.
    
    Noticed by Jesper Juhl.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 0e1edbd99994270023cea5afe593f972eb09a778
Author: Nathan Scott <nathans@sgi.com>
Date:   Thu Aug 10 14:40:41 2006 +1000

    [XFS] Fix xfs_free_extent related NULL pointer dereference.
    
    We recently fixed an out-of-space deadlock in XFS, and part of that fix
    involved the addition of the XFS_ALLOC_FLAG_FREEING flag to some of the
    space allocator calls to indicate they're freeing space, not allocating
    it. There was a missed xfs_alloc_fix_freelist condition test that did not
    correctly test "flags". The same test would also test an uninitialised
    structure field (args->userdata) and depending on its value either would
    or would not return early with a critical buffer pointer set to NULL.
    
    This fixes that up, adds asserts to several places to catch future botches
    of this nature, and skips sections of xfs_alloc_fix_freelist that are
    irrelevent for the space-freeing case.
    
    SGI-PV: 955303
    SGI-Modid: xfs-linux-melb:xfs-kern:26743a
    
    Signed-off-by: Nathan Scott <nathans@sgi.com>

commit fff642570dc47ab76491fe81ee6599269c4eb13e
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Wed Aug 9 17:36:15 2006 -0700

    [IPX]: Fix typo, ipxhdr() --> ipx_hdr()
    
    Noticed by Dave Jones.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 06aebfb7faa13258af5230ff3d1587ece6c0250e
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Wed Aug 9 16:52:04 2006 -0700

    [IPV6]: The ifa lock is a BH lock
    
    The ifa lock is expected to be taken in BH context (by addrconf timers)
    so we must disable BH when accessing it from user context.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 79860a9a79a17f1bed0713680a3995467405ba49
Author: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date:   Wed Aug 9 15:04:58 2006 +0100

    [PATCH] PATCH: 2.6.18 oops on boot fix for IDE
    
    When the IDE fix for Jmicron went in one piece went walking somewhere
    (send log shows my end somehow). Without this sometimes you get an oops
    on boot.
    
    Signed-off-by: Alan Cox <alan@redhat.com>
    Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

commit 7c91767a6b701543c93ebcd611dab61deff3dad1
Author: Dmitry Mishin <dim@openvz.org>
Date:   Wed Aug 9 02:25:54 2006 -0700

    [NET]: add_timer -> mod_timer() in dst_run_gc()
    
    Patch from Dmitry Mishin <dim@openvz.org>:
    
    Replace add_timer() by mod_timer() in dst_run_gc
    in order to avoid BUG message.
    
           CPU1                            CPU2
    dst_run_gc()  entered           dst_run_gc() entered
    spin_lock(&dst_lock)                   .....
    del_timer(&dst_gc_timer)         fail to get lock
           ....                         mod_timer() <--- puts
                                                     timer back
                                                     to the list
    add_timer(&dst_gc_timer) <--- BUG because timer is in list already.
    
    Found during OpenVZ internal testing.
    
    At first we thought that it is OpenVZ specific as we
    added dst_run_gc(0) call in dst_dev_event(),
    but as Alexey pointed to me it is possible to trigger
    this condition in mainstream kernel.
    
    F.e. timer has fired on CPU2, but the handler was preeempted
    by an irq before dst_lock is tried.
    Meanwhile, someone on CPU1 adds an entry to gc list and
    starts the timer.
    If CPU2 was preempted long enough, this timer can expire
    simultaneously with resuming timer handler on CPU1, arriving
    exactly to the situation described.
    
    Signed-off-by: Dmitry Mishin <dim@openvz.org>
    Signed-off-by: Kirill Korotaev <dev@openvz.org>
    Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 22aac0896b1b0b8cabaf00714c55dd12f25d5738
Author: Tejun Heo <htejun@gmail.com>
Date:   Tue Aug 8 14:08:59 2006 +0900

    [PATCH] libata: clear sdev->locked on door lock failure
    
    SCSI EH locks door if sdev->locked is set.  Sometimes door lock
    command fails continuously (e.g. when medium is not present) and as
    libata uses EH to acquire sense data, this easily creates a loop where
    a failed lock door invokes EH and EH issues lock door on completion.
    
    This patch clears sdev->locked on door lock failure to break this
    loop.  This problem has been spotted and diagnosed by Unicorn Chang
    <uchang@tw.ibm.com>.
    
    Signed-off-by: Tejun Heo <htejun@gmail.com>
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

commit 85455dd34219376dcc7dce94bea67058f0b7d731
Author: Keith Owens <kaos@ocs.com.au>
Date:   Tue Aug 8 13:51:02 2006 +1000

    [PATCH] Fix compile problem when sata debugging is on
    
    Fix a sata debug print statement that still uses an old variable name.
    
    Signed-off-by: Keith Owens <kaos@ocs.com.au>
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

commit 013b68bf7cb4edfb8753aac9bde3cb60c84bc067
Author: Brice Goglin <brice@myri.com>
Date:   Wed Aug 9 00:07:53 2006 -0400

    [PATCH] myri10ge: always re-enable dummy rdmas in myri10ge_resume
    
    Dummy RDMA are always enabled on device startup since commit
    9a71db721a2cbb9921b929b2699ab181f5a3c6c0 (to work around buggy
    PCIe chipsets which do not implement resending properly). But,
    we also need to always re-enable them when resuming the device.
    
    Signed-off-by: Brice Goglin <brice@myri.com>
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

commit 7b1ba8de569460894efa892457af7a37c0d574f9
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Tue Aug 8 16:48:51 2006 -0700

    [IPX]: Another nonlinear receive fix
    
    Need to check some more cases in IPX receive.  If the skb is purely
    fragments, the IPX header needs to be extracted. The function
    pskb_may_pull() may in theory invalidate all the pointers in the skb,
    so references to ipx header must be refreshed.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 70f8e78e150425b01c1099087ad3decacf7e4ccf
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Tue Aug 8 16:47:37 2006 -0700

    [RTNETLINK]: Fix IFLA_ADDRESS handling.
    
    The ->set_mac_address handlers expect a pointer to a
    sockaddr which contains the MAC address, whereas
    IFLA_ADDRESS provides just the MAC address itself.
    
    So whip up a sockaddr to wrap around the netlink
    attribute for the ->set_mac_address call.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 0851fb48486c044476b019029245cb9b348fca33
Author: Yeasah Pell <yeasah@schwide.com>
Date:   Tue Aug 8 09:10:18 2006 -0300

    V4L/DVB (4431): Add several error checks to dst
    
    Signed-off-by: Yeasah Pell <yeasah@shwide.com>
    Signed-off-by: Manu Abraham <manu@linuxtv.org>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit eafa98177a388700d613c187c7716e11869b7171
Author: Diego Calleja <diegocg@gmail.com>
Date:   Tue Aug 8 09:10:17 2006 -0300

    V4L/DVB (4430): Quickcam_messenger compilation fix
    
    In bugzilla #6943, Maxim Britov reported:
    "I can enable Logitech quickcam support in .config, but it want be compile.
    I have to add into drivers/media/video/Makefile:
    obj-$(CONFIG_USB_QUICKCAM_MESSENGER)    += usbvideo/"
    He's right, just enable that driver as module while disabling every other
    driver that gets into that directory, nothing will get compiled.
    This patch fixes the Makefile.
    
    Signed-off-by: Diego Calleja <diegocg@gmail.com>
    Acked-by: Michael Krufky <mkrufky@linuxtv.org>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 8a016dcc8383bedc731bca9fdedae77ff52a6ad7
Author: Mauro Carvalho Chehab <mchehab@infradead.org>
Date:   Tue Aug 8 09:10:16 2006 -0300

    V4L/DVB (4427): Fix V4L1 Compat for VIDIOCGPICT ioctl
    
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 43c560fa59ef2688d891f6431e972b7763b73a00
Author: Hans Verkuil <hverkuil@xs4all.nl>
Date:   Tue Aug 8 09:10:15 2006 -0300

    V4L/DVB (4419): Turn on the Low Noise Amplifier of the Samsung tuners.
    
    Without the LNA these tuners perform very poorly (read 'unwatchable') when
    the signal is weak.
    
    Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit e335fada5afef0658038b6bec432d37f332da9fc
Author: Hans Verkuil <hverkuil@xs4all.nl>
Date:   Tue Aug 8 09:10:15 2006 -0300

    V4L/DVB (4418): Fix broken msp3400 module option 'standard'
    
    Due to a wrong statement order the 'standard' module option didn't
    work for 'G' model chips.
    
    Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 17531c168c7b8138b5379a85de788798fc6696e1
Author: Hans Verkuil <hverkuil@xs4all.nl>
Date:   Tue Aug 8 09:10:12 2006 -0300

    V4L/DVB (4416): Cx25840_read4 has wrong endianness.
    
    cx25840_read4 assembled the bytes in the wrong order.
    
    Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit b02dc515914bc37f5e406cc6f622fc06991eb779
Author: Trent Piepho <xyzzy@speakeasy.org>
Date:   Tue Aug 8 09:10:12 2006 -0300

    V4L/DVB (4411): Fix minor errors in build files
    
    In pwc Kconfig, change 'depends' to 'depends on'
    In dvb-core Makefile, change '=' to ':='
    
    Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 7fb6529762f8185f792ad74adcf211713f973c80
Author: Mauro Carvalho Chehab <mchehab@infradead.org>
Date:   Tue Aug 8 15:47:31 2006 -0300

    V4L/DVB (4407): Driver dsbr100 is a radio device, not a video one!
    
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 0dfa9abd6bd153364da8cb131db0d0404ab15ad1
Author: Mauro Carvalho Chehab <mchehab@infradead.org>
Date:   Tue Aug 8 09:10:10 2006 -0300

    V4L/DVB (4399): Fix a typo that caused some compat stuff to not work
    
    Config option typo:
    -#ifdef CONFIG_V4L1_COMPAT
    +#ifdef CONFIG_VIDEO_V4L1_COMPAT
    
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 3cdc0f263b9fe99f7dfcef49df7a148b458e9d11
Author: Luc Van Oostenryck <luc.vanoostenryck@looxix.net>
Date:   Tue Aug 8 09:10:10 2006 -0300

    V4L/DVB (4395): Restore compat_ioctl in pwc driver
    
    The compat_ioctl support of the pwc driver was dropped during the last update of the driver.
    I suppose it was by mistake. If yes here is the patch to restore the support.
    
    Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@looxix.net>
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 6054b68e84478e66e626bde33f93b5f0bf48c180
Author: Mauro Carvalho Chehab <mchehab@infradead.org>
Date:   Tue Aug 8 09:10:06 2006 -0300

    V4L/DVB (4371b): Fix V4L1 dependencies at drivers under sound/oss and sound/pci
    
    TVMixer and FM801 Tea5757 are still using V4L1 API.
    
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 5bdd6cc3300be534f2a2cf826e3fb926b5695698
Author: Mauro Carvalho Chehab <mchehab@infradead.org>
Date:   Tue Jul 25 16:17:54 2006 -0300

    V4L/DVB (4371a): Fix V4L1 dependencies on compat_ioctl32
    
    Compat32 should be able to handle V4L1 ioctls if the old API support were
    selected.
    
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 487206f26364a878f5f8cec9f92796d403f1d7c9
Author: Mauro Carvalho Chehab <mchehab@infradead.org>
Date:   Tue Aug 8 09:10:01 2006 -0300

    V4L/DVB (4340): Videodev.h should be included also when V4L1_COMPAT is selected.
    
    Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>

commit 1b2a720506ccf7c30baaeda5d990c29b31e21726
Author: Michael Chan <mchan@broadcom.com>
Date:   Mon Aug 7 21:46:02 2006 -0700

    [TG3]: Fix tx race condition
    
    Fix a subtle race condition between tg3_start_xmit() and tg3_tx()
    discovered by Herbert Xu <herbert@gondor.apana.org.au>:
    
    CPU0					CPU1
    tg3_start_xmit()
    	if (tx_ring_full) {
    		tx_lock
    					tg3_tx()
    						if (!netif_queue_stopped)
    		netif_stop_queue()
    		if (!tx_ring_full)
    						update_tx_ring
    			netif_wake_queue()
    		tx_unlock
    	}
    
    Even though tx_ring is updated before the if statement in tg3_tx() in
    program order, it can be re-ordered by the CPU as shown above.  This
    scenario can cause the tx queue to be stopped forever if tg3_tx() has
    just freed up the entire tx_ring.  The possibility of this happening
    should be very rare though.
    
    The following changes are made:
    
    1. Add memory barrier to fix the above race condition.
    
    2. Eliminate the private tx_lock altogether and rely solely on
    netif_tx_lock.  This eliminates one spinlock in tg3_start_xmit()
    when the ring is full.
    
    3. Because of 2, use netif_tx_lock in tg3_tx() before calling
    netif_wake_queue().
    
    4. Change TX_BUFFS_AVAIL to an inline function with a memory barrier.
    Herbert and David suggested using the memory barrier instead of
    volatile.
    
    5. Check for the full wake queue condition before getting
    netif_tx_lock in tg3_tx().  This reduces the number of unnecessary
    spinlocks when the tx ring is full in a steady-state condition.
    
    6. Update version to 3.65.
    
    Signed-off-by: Michael Chan <mchan@broadcom.com>
    Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit bd37a088596ccdb2b2dd3299e25e333bca7a9a34
Author: Wei Yongjun <yjwei@nanjing-fnst.com>
Date:   Mon Aug 7 21:04:15 2006 -0700

    [TCP]: SNMPv2 tcpOutSegs counter error
    
    Do not count retransmitted segments.
    
    Signed-off-by: Wei Yongjun <yjwei@nanjing-fnst.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 69d8c28c9578ce78b3dc1b9be36926d962282898
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Mon Aug 7 20:52:10 2006 -0700

    [PKTGEN]: Make sure skb->{nh,h} are initialized in fill_packet_ipv6() too.
    
    Mirror the bug fix from fill_packet_ipv4()
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit aaf580601ff244df82324fff380ed6740f27ef03
Author: Chen-Li Tien <cltien@gmail.com>
Date:   Mon Aug 7 20:49:07 2006 -0700

    [PKTGEN]: Fix oops when used with balance-tlb bonding
    
    Signed-off-by: Chen-Li Tien <cltien@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 8d1502de27c46b365b5c86e17d173083d3d6c9ac
Author: Kirill Korotaev <dev@sw.ru>
Date:   Mon Aug 7 20:44:22 2006 -0700

    [IPV4]: Limit rt cache size properly.
    
    From: Kirill Korotaev <dev@sw.ru>
    
    During OpenVZ stress testing we found that UDP traffic with random src
    can generate too much excessive rt hash growing leading finally to OOM
    and kernel panics.
    
    It was found that for 4GB i686 system (having 1048576 total pages and
      225280 normal zone pages) kernel allocates the following route hash:
    syslog: IP route cache hash table entries: 262144 (order: 8, 1048576
    bytes) => ip_rt_max_size = 4194304 entries, i.e.  max rt size is
    4194304 * 256b = 1Gb of RAM > normal_zone
    
    Attached the patch which removes HASH_HIGHMEM flag from
    alloc_large_system_hash() call.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 8b5cc5ef40c83c6ea4c90b203bb2c8b17edfa11b
Author: Stephen Hemminger <shemminger@osdl.org>
Date:   Mon Aug 7 20:09:20 2006 -0700

    [IPX]: Header length validation needed
    
    This patch will linearize and check there is enough data.
    It handles the pprop case as well as avoiding a whole audit of
    the routing code.
    
    Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit d14cc9a342a8004b0ecfe66f1f12120962b61d8c
Author: Christoph Hellwig <hch@lst.de>
Date:   Mon Aug 7 16:11:48 2006 -0700

    [TG3]: skb->dev assignment is done by netdev_alloc_skb
    
    All caller of netdev_alloc_skb need to assign skb->dev shortly
    afterwards.  Move it into common code.
    
    Signed-off-by: Christoph Hellwig <hch@lst.de>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 7b2e497a06c0e93719fda88820e057b635e8fae2
Author: Christoph Hellwig <hch@lst.de>
Date:   Mon Aug 7 16:09:04 2006 -0700

    [NET]: Assign skb->dev in netdev_alloc_skb
    
    Signed-off-by: Christoph Hellwig <hch@lst.de>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 766ea8cce007e699679109df4fa469b870ba4860
Author: Christoph Hellwig <hch@lst.de>
Date:   Mon Aug 7 15:49:53 2006 -0700

    [NET]: Fix alloc_skb comment typo
    
    Signed-off-by: Christoph Hellwig <hch@lst.de>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit 9ee4e3365dd0dab4c1e02fe44dc08a223b826c72
Author: Sam Ravnborg <sam@mars.ravnborg.org>
Date:   Mon Aug 7 21:01:36 2006 +0200

    kbuild: external modules shall not check config consistency
    
    external modules needs include/linux/autoconf.h and include/config/auto.conf
    but skip the integrity test of these. Even with a newer Kconfig file we
    shall just proceed since external modules simply uses the kernel source and
    shall not attempt to modify it.
    Error out if a config fiel is missing since they are mandatory.
    
    Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

commit 58a2f7d85aaf4c41157f15c43a913b5c3c6b3adb
Author: Sam Ravnborg <sam@mars.ravnborg.org>
Date:   Mon Aug 7 20:58:28 2006 +0200

    kbuild: do not try to build content of initramfs
    
    When a file supplied via CONFIG_INITRAMFS pointed to a file
    for which kbuild had a rule to compile it (foo.c => foo.o)
    then kbuild would compile the file before adding the
    file to the initramfs.
    
    Teach make that files included in initramfs shall not be updated by adding
    an 'empty command'. (See "Using Empty Commands" in info make).
    
    Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

commit 65200c291c4ce397835d8232eaa0878b765a9bce
Author: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date:   Mon Aug 7 17:00:33 2006 +0200

    [S390] lost interrupt after chpid vary off/on cycle.
    
    I/O on a CCW device may stall if a channel path to that device is
    logicaly varied off/on. A user I/O interrupt can get misinterpreted
    as interrupt for an internal path verification operation due to a
    missing check and is therefore never reported to the device driver.
    
    Correct check for pending interruptions before starting path
    verification.
    
    Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
    Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

commit 4bc0c4f9e74e7a5d62ef4db923b377f2aa474a8f
Author: Cornelia Huck <cornelia.huck@de.ibm.com>
Date:   Mon Aug 7 17:00:30 2006 +0200

    [S390] retry after deferred condition code.
    
    Do a retry of read device characteristics / read configuration
    data when a deferred condition code 1 is encountered in
    ccw_device_wake_up().
    
    Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
    Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

commit 25a2001a285e187d99ea347693bedd73b3e0722a
Author: Heiko Carstens <heiko.carstens@de.ibm.com>
Date:   Mon Aug 7 17:00:28 2006 +0200

    [S390] tape class return value handling.
    
    Without this patch register_tape_dev() will always fail, but might
    return a value that is not an error number. This will lead to accesses
    to already freed memory areas...
    
    Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
    Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

commit dd2c609c59cd9daeb90d58f3d1bc3813e8987df1
Author: Pierre Ossman <drzeus@drzeus.cx>
Date:   Mon Aug 7 01:40:04 2006 +0200

    [MMC] Another stray 'io' reference
    
    Another misuse of the global 'io' variable instead of the local 'base'.
    
    Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit 916f3ac680fcc4c142e9acd46161f55533b11760
Author: Pierre Ossman <drzeus@drzeus.cx>
Date:   Sun Aug 6 22:22:23 2006 +0200

    [MMC] Fix base address configuration in wbsd
    
    There were some confusion about base I/O variables in the wbsd driver.
    Seems like things have been working on shear luck so far. The global 'io'
    variable (used when manually configuring the resources) was used instead of
    the local 'base' variable.
    
    Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit 574dc0abab6650c5371df15ac708e48fa25bed89
Author: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date:   Sun Aug 6 20:55:33 2006 +0100

    [ARM] Fix Acorn platform SCSI driver build failures
    
    SCSI folk forgot to fix up all the uses of 'buffer' before deleting
    this struct member.  Do it for them to rescue the resulting build
    failures.
    
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit ca8c100a00e91cf93782008441a2e159c4b49a25
Author: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date:   Sun Aug 6 20:53:40 2006 +0100

    [ARM] Fix NCR5380-based SCSI card build
    
    The NCR5380-based SCSI cards need the SCSI SPI transport selected
    to build correctly.
    
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit 8832157bff28e6559fd38fc18433f8bd8803340a
Author: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date:   Sun Aug 6 11:00:45 2006 +0100

    [ARM] Fix pci export warnings
    
    Remove duplicate PCI exports from ixp4xx machine class.
    
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit 4c756f4e954186630f6ee91a4da9a4b39fe3618d
Author: Martin Michlmayr <tbm@cyrius.com>
Date:   Sun Aug 6 09:59:26 2006 +0100

    [ARM] 3747/1: Fix compilation error in mach-ixp4xx/gtwx5715-setup.c
    
    Patch from Martin Michlmayr
    
    Fix the following compilation error in arm/mach-ixp4xx/gtwx5715-setup:
    
      CC      arch/arm/mach-ixp4xx/gtwx5715-setup.o
    arch/arm/mach-ixp4xx/gtwx5715-setup.c:110: error: expected identifier or '(' before �=� token
    arch/arm/mach-ixp4xx/gtwx5715-setup.c:121: error: 'gtwx5715_flash_resource' undeclared here (not in a function)
    arch/arm/mach-ixp4xx/gtwx5715-setup.c: In function 'gtwx5715_init':
    arch/arm/mach-ixp4xx/gtwx5715-setup.c:133: error: 'flash_resource' undeclared (first use in this function)
    arch/arm/mach-ixp4xx/gtwx5715-setup.c:133: error: (Each undeclared identifier is reported only once
    arch/arm/mach-ixp4xx/gtwx5715-setup.c:133: error: for each function it appears in.)
    make[1]: *** [arch/arm/mach-ixp4xx/gtwx5715-setup.o] Error 1
    
    Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit b39239330858bc99b4d529923ec58e95ff6103f8
Author: George G. Davis <davis_g@mvista.com>
Date:   Sun Aug 6 09:59:25 2006 +0100

    [ARM] 3745/1: Add EXPORT_SYMBOL(rtc_next_alarm_time) to ARM rtctime.c
    
    Patch from George G. Davis
    
    Fix "WARNING: "rtc_next_alarm_time" [drivers/rtc/rtc-sa1100.ko]
    undefined!"
    
    Signed-off-by: George G. Davis <gdavis@mvista.com>
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit c723e08af18e60cda3bbc90e0d6fb688587aeb9f
Author: Juha [��l� <juha.yrjola@solidboot.com>
Date:   Sun Aug 6 09:58:22 2006 +0100

    [ARM] 3744/1: MMC: mmcqd gets stuck when block queue is plugged
    
    Patch from Juha [��l�
    
    When the block queue is plugged, mq->req must be set to NULL.
    Otherwise mq->req might be left non-NULL, even though mmcqd is
    not processing a request, thus preventing the MMC queue thread from
    being woken up when new requests do arrive.
    
    Signed-off-by: Juha Yrjola <juha.yrjola@solidboot.com>
    Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit 77f3f879387cc5fce6c7a4ca7777c586fe6291cd
Author: Tejun Heo <htejun@gmail.com>
Date:   Sat Aug 5 03:59:19 2006 +0900

    [PATCH] libata: fix ata_device_add() error path
    
    In the error path, ata_device_add()
    
    * dereferences null host_set->ports[] element.
    * calls scsi_remove_host() on not-yet-added shost.
    
    This patch fixes both bugs.  The first problem was spotted and initial
    patch submitted by Dave Jones <davej@redhat.com>.  The second problem
    was mentioned and fixed by Jeff Garzik <jgarzik@pobox.com> in a larger
    cleanup patch.
    
    Cc: Dave Jones <davej@redhat.com>
    Cc: Jeff Garzik <jgarzik@pobox.com>
    Signed-off-by: Tejun Heo <htejun@gmail.com>

commit 6543bc0777fadf129d8ee7ac82e0090fb0480403
Author: Jeff Garzik <jeff@garzik.org>
Date:   Sat Aug 5 03:59:17 2006 +0900

    [PATCH] [libata] manually inline ata_host_remove()
    
    (tj: this is for the following ata_device_add() fix)
    
    Signed-off-by: Jeff Garzik <jeff@garzik.org>
    Signed-off-by: Tejun Heo <htejun@gmail.com>

commit f31e945c50e47b7163c6018928aa93b66e8f22f0
Author: Tejun Heo <htejun@gmail.com>
Date:   Sat Aug 5 03:59:15 2006 +0900

    [PATCH] sata_sil24: don't set probe_ent->mmio_base
    
    sata_sil24 doesn't make use of probe_ent->mmio_base and setting this
    field causes the area to be released twice on detach.  Don't set
    probe_ent->mmio_base.
    
    Signed-off-by: Tejun Heo <htejun@gmail.com>

commit f814b75f4ed2ed58cd7bc876a0a9406f984aac6d
Author: Tejun Heo <htejun@gmail.com>
Date:   Sat Aug 5 03:59:13 2006 +0900

    [PATCH] ata_piix: fix host_set private_data intialization
    
    To get host_set->private_data initialized reliably, all pinfos need to
    point to the same hpriv.  Restore pinfo->private_data after pata pinfo
    assignment.
    
    Signed-off-by: Tejun Heo <htejun@gmail.com>

commit c3cf30a989efec8144225517e0b2f82c955e3966
Author: Tejun Heo <htejun@gmail.com>
Date:   Sat Aug 5 03:59:11 2006 +0900

    [PATCH] libata: fix ata_port_detach() for old EH ports
    
    ata_prot_detach() did nothing for old EH ports and thus SCSI hosts
    associated with those ports are left dangling after they are detached
    leaving stale devices and causing oops eventually.  Make
    ata_port_detach() remove SCSI hosts for old EH ports.
    
    Signed-off-by: Tejun Heo <htejun@gmail.com>

commit 782a6675119c76c071e74e2ddd98268f47770cba
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Thu Aug 3 23:54:41 2006 +1000

    [PATCH] Send wireless netlink events with a clean slate
    
    Drivers expect to be able to call wireless_send_event in arbitrary
    contexts.  On the other hand, netlink really doesn't like being
    invoked in an IRQ context.  So we need to postpone the sending of
    netlink skb's to a tasklet.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: John W. Linville <linville@tuxdriver.com>

commit 8bcb2839b74d605f5549962a6e69dc07768e95b6
Author: Dave Kleikamp <shaggy@austin.ibm.com>
Date:   Fri Jul 28 08:46:05 2006 -0500

    JFS: Fix bug in quota code.  tmp_bh.b_size must be initialized
    
    Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>

commit 115ff50bade0f93a288677745a5884def6cbf9b1
Author: Dave Kleikamp <shaggy@austin.ibm.com>
Date:   Wed Jul 26 14:52:13 2006 -0500

    JFS: Quota support broken, no quota_read and quota_write
    
    jfs_quota_read/write are very near duplicates of ext2_quota_read/write.
    
    Cleaned up jfs_get_block as long as I had to change it to be non-static.
    
    Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 CREDITS                                                   |    7 
 Documentation/00-INDEX                                    |    2 
 Documentation/netlabel/00-INDEX                           |   10 
 Documentation/netlabel/cipso_ipv4.txt                     |   48 
 Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt |  791 ++++
 Documentation/netlabel/introduction.txt                   |   46 
 Documentation/netlabel/lsm_interface.txt                  |   47 
 Documentation/networking/ip-sysctl.txt                    |   35 
 Documentation/networking/secid.txt                        |   14 
 Makefile                                                  |   24 
 drivers/atm/he.c                                          |    2 
 drivers/net/3c59x.c                                       |    2 
 drivers/net/8139cp.c                                      |    6 
 drivers/net/acenic.c                                      |    8 
 drivers/net/arcnet/com20020-pci.c                         |    1 
 drivers/net/bnx2.c                                        |    2 
 drivers/net/cassini.c                                     |    4 
 drivers/net/chelsio/sge.c                                 |   10 
 drivers/net/dl2k.c                                        |    2 
 drivers/net/e1000/e1000_main.c                            |    8 
 drivers/net/forcedeth.c                                   |    3 
 drivers/net/gianfar.c                                     |    2 
 drivers/net/hamachi.c                                     |    2 
 drivers/net/ibm_emac/ibm_emac_core.c                      |    2 
 drivers/net/ioc3-eth.c                                    |    2 
 drivers/net/irda/ali-ircc.c                               |    8 
 drivers/net/irda/irport.c                                 |    4 
 drivers/net/irda/via-ircc.c                               |    5 
 drivers/net/irda/w83977af_ir.c                            |    4 
 drivers/net/ixgb/ixgb_main.c                              |    2 
 drivers/net/mv643xx_eth.c                                 |    2 
 drivers/net/myri10ge/myri10ge.c                           |    8 
 drivers/net/ns83820.c                                     |    2 
 drivers/net/r8169.c                                       |    2 
 drivers/net/s2io.c                                        |    2 
 drivers/net/sk98lin/skge.c                                |    6 
 drivers/net/skge.c                                        |    4 
 drivers/net/sky2.c                                        |    6 
 drivers/net/starfire.c                                    |    6 
 drivers/net/sungem.c                                      |    4 
 drivers/net/sunhme.c                                      |    6 
 drivers/net/tg3.c                                         |   83 
 drivers/net/tg3.h                                         |    8 
 drivers/net/typhoon.c                                     |    2 
 drivers/net/via-rhine.c                                   |    2 
 drivers/net/via-velocity.c                                |    2 
 include/linux/fib_rules.h                                 |   64 
 include/linux/if.h                                        |  129 
 include/linux/if_addr.h                                   |   53 
 include/linux/ip.h                                        |    1 
 include/linux/neighbour.h                                 |  159 
 include/linux/net.h                                       |   28 
 include/linux/netdevice.h                                 |    4 
 include/linux/netfilter.h                                 |    6 
 include/linux/netfilter_ipv4/ip_nat.h                     |    4 
 include/linux/netfilter_ipv4/ip_nat_core.h                |    8 
 include/linux/rtnetlink.h                                 |  376 --
 include/linux/security.h                                  |  239 +
 include/linux/skbuff.h                                    |   21 
 include/linux/snmp.h                                      |    2 
 include/linux/sysctl.h                                    |    4 
 include/net/cipso_ipv4.h                                  |  250 +
 include/net/dn_fib.h                                      |   18 
 include/net/fib_rules.h                                   |   97 
 include/net/flow.h                                        |    5 
 include/net/genetlink.h                                   |    5 
 include/net/inet_hashtables.h                             |   48 
 include/net/inet_sock.h                                   |    6 
 include/net/ip6_fib.h                                     |   73 
 include/net/ip6_route.h                                   |   15 
 include/net/ip_fib.h                                      |  102 
 include/net/neighbour.h                                   |   41 
 include/net/netlabel.h                                    |  291 +
 include/net/netlink.h                                     |  107 
 include/net/nexthop.h                                     |   33 
 include/net/request_sock.h                                |    1 
 include/net/route.h                                       |    3 
 include/net/sock.h                                        |   14 
 include/net/xfrm.h                                        |    2 
 kernel/taskstats.c                                        |    2 
 net/Kconfig                                               |    5 
 net/Makefile                                              |    1 
 net/atm/atm_sysfs.c                                       |    1 
 net/bridge/br_netlink.c                                   |   31 
 net/bridge/netfilter/ebtables.c                           |   21 
 net/core/Makefile                                         |    1 
 net/core/datagram.c                                       |    4 
 net/core/dev.c                                            |   19 
 net/core/dev_mcast.c                                      |    3 
 net/core/dst.c                                            |    3 
 net/core/fib_rules.c                                      |  427 ++
 net/core/flow.c                                           |    7 
 net/core/neighbour.c                                      |  575 +--
 net/core/netpoll.c                                        |    2 
 net/core/pktgen.c                                         |    4 
 net/core/rtnetlink.c                                      |  551 +--
 net/core/skbuff.c                                         |   18 
 net/core/sock.c                                           |   12 
 net/core/wireless.c                                       |    4 
 net/dccp/ipv4.c                                           |   14 
 net/dccp/ipv6.c                                           |    9 
 net/decnet/Kconfig                                        |    1 
 net/decnet/af_decnet.c                                    |    1 
 net/decnet/dn_dev.c                                       |   29 
 net/decnet/dn_fib.c                                       |   76 
 net/decnet/dn_route.c                                     |    8 
 net/decnet/dn_rules.c                                     |  495 +--
 net/decnet/dn_table.c                                     |  165 -
 net/ipv4/Kconfig                                          |    1 
 net/ipv4/Makefile                                         |    1 
 net/ipv4/af_inet.c                                        |    6 
 net/ipv4/ah4.c                                            |    2 
 net/ipv4/cipso_ipv4.c                                     | 1607 ++++++++++
 net/ipv4/devinet.c                                        |  246 +
 net/ipv4/fib_frontend.c                                   |  474 ++
 net/ipv4/fib_hash.c                                       |  126 
 net/ipv4/fib_lookup.h                                     |   13 
 net/ipv4/fib_rules.c                                      |  617 +--
 net/ipv4/fib_semantics.c                                  |  518 +--
 net/ipv4/fib_trie.c                                       |  110 
 net/ipv4/icmp.c                                           |   16 
 net/ipv4/igmp.c                                           |    6 
 net/ipv4/inet_connection_sock.c                           |    3 
 net/ipv4/inet_hashtables.c                                |   33 
 net/ipv4/ip_fragment.c                                    |   12 
 net/ipv4/ip_gre.c                                         |    4 
 net/ipv4/ip_options.c                                     |   20 
 net/ipv4/ip_output.c                                      |   10 
 net/ipv4/ipconfig.c                                       |    1 
 net/ipv4/ipmr.c                                           |    7 
 net/ipv4/ipvs/ip_vs_proto_tcp.c                           |    8 
 net/ipv4/ipvs/ip_vs_proto_udp.c                           |    8 
 net/ipv4/netfilter.c                                      |    2 
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c              |    1 
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c               |    3 
 net/ipv4/netfilter/ip_conntrack_proto_udp.c               |    3 
 net/ipv4/netfilter/ip_conntrack_sip.c                     |    1 
 net/ipv4/netfilter/ip_nat_core.c                          |   52 
 net/ipv4/netfilter/ip_nat_helper.c                        |   59 
 net/ipv4/netfilter/ip_nat_proto_gre.c                     |    5 
 net/ipv4/netfilter/ip_nat_proto_icmp.c                    |    8 
 net/ipv4/netfilter/ip_nat_proto_tcp.c                     |    7 
 net/ipv4/netfilter/ip_nat_proto_udp.c                     |   15 
 net/ipv4/netfilter/ip_nat_standalone.c                    |    9 
 net/ipv4/netfilter/ip_queue.c                             |    6 
 net/ipv4/netfilter/ipt_ECN.c                              |   22 
 net/ipv4/netfilter/ipt_REJECT.c                           |    2 
 net/ipv4/netfilter/ipt_TCPMSS.c                           |   38 
 net/ipv4/netfilter/ipt_hashlimit.c                        |   11 
 net/ipv4/proc.c                                           |    2 
 net/ipv4/raw.c                                            |    4 
 net/ipv4/route.c                                          |  158 
 net/ipv4/syncookies.c                                     |    5 
 net/ipv4/sysctl_net_ipv4.c                                |   35 
 net/ipv4/tcp.c                                            |   10 
 net/ipv4/tcp_input.c                                      |   34 
 net/ipv4/tcp_ipv4.c                                       |   23 
 net/ipv4/tcp_lp.c                                         |    1 
 net/ipv4/tcp_minisocks.c                                  |    4 
 net/ipv4/tcp_output.c                                     |   42 
 net/ipv4/tcp_probe.c                                      |    3 
 net/ipv4/tcp_timer.c                                      |   16 
 net/ipv4/tcp_veno.c                                       |    1 
 net/ipv4/udp.c                                            |   23 
 net/ipv4/xfrm4_output.c                                   |    4 
 net/ipv6/Kconfig                                          |    7 
 net/ipv6/Makefile                                         |    1 
 net/ipv6/addrconf.c                                       |  100 
 net/ipv6/af_inet6.c                                       |    3 
 net/ipv6/datagram.c                                       |    2 
 net/ipv6/exthdrs.c                                        |    2 
 net/ipv6/fib6_rules.c                                     |  258 +
 net/ipv6/icmp.c                                           |    6 
 net/ipv6/inet6_connection_sock.c                          |    1 
 net/ipv6/ip6_fib.c                                        |  309 +
 net/ipv6/ip6_output.c                                     |    2 
 net/ipv6/mcast.c                                          |    2 
 net/ipv6/ndisc.c                                          |    2 
 net/ipv6/netfilter.c                                      |    2 
 net/ipv6/netfilter/ip6_queue.c                            |    6 
 net/ipv6/netfilter/ip6t_REJECT.c                          |    1 
 net/ipv6/netfilter/nf_conntrack_reasm.c                   |    6 
 net/ipv6/raw.c                                            |    3 
 net/ipv6/reassembly.c                                     |   14 
 net/ipv6/route.c                                          |  531 +--
 net/ipv6/tcp_ipv6.c                                       |   15 
 net/ipv6/udp.c                                            |   21 
 net/ipv6/xfrm6_output.c                                   |    4 
 net/ipx/af_ipx.c                                          |   10 
 net/key/af_key.c                                          |   37 
 net/llc/llc_sap.c                                         |    3 
 net/netfilter/core.c                                      |   24 
 net/netfilter/nf_conntrack_proto_tcp.c                    |    3 
 net/netfilter/nf_conntrack_proto_udp.c                    |    3 
 net/netfilter/nf_internals.h                              |    2 
 net/netfilter/nf_queue.c                                  |   80 
 net/netfilter/nfnetlink_queue.c                           |    6 
 net/netfilter/xt_string.c                                 |    2 
 net/netlabel/Kconfig                                      |   14 
 net/netlabel/Makefile                                     |   16 
 net/netlabel/netlabel_cipso_v4.c                          |  542 +++
 net/netlabel/netlabel_cipso_v4.h                          |  217 +
 net/netlabel/netlabel_domainhash.c                        |  513 +++
 net/netlabel/netlabel_domainhash.h                        |   63 
 net/netlabel/netlabel_kapi.c                              |  231 +
 net/netlabel/netlabel_mgmt.c                              |  624 +++
 net/netlabel/netlabel_mgmt.h                              |  246 +
 net/netlabel/netlabel_unlabeled.c                         |  253 +
 net/netlabel/netlabel_unlabeled.h                         |   98 
 net/netlabel/netlabel_user.c                              |  158 
 net/netlabel/netlabel_user.h                              |  214 +
 net/netlink/af_netlink.c                                  |   75 
 net/netlink/attr.c                                        |   75 
 net/netlink/genetlink.c                                   |    4 
 net/packet/af_packet.c                                    |    2 
 net/sched/act_api.c                                       |    7 
 net/sched/sch_htb.c                                       | 1371 +++-----
 net/sched/sch_netem.c                                     |    4 
 net/socket.c                                              | 1024 +++---
 net/sunrpc/socklib.c                                      |    2 
 net/sunrpc/svcsock.c                                      |   38 
 net/sunrpc/xprtsock.c                                     |   13 
 net/xfrm/xfrm_policy.c                                    |   36 
 net/xfrm/xfrm_state.c                                     |   14 
 net/xfrm/xfrm_user.c                                      |   58 
 security/dummy.c                                          |   64 
 security/selinux/hooks.c                                  |  239 +
 security/selinux/include/av_perm_to_string.h              |    1 
 security/selinux/include/av_permissions.h                 |    1 
 security/selinux/include/objsec.h                         |    9 
 security/selinux/include/security.h                       |    2 
 security/selinux/include/selinux_netlabel.h               |  125 
 security/selinux/include/xfrm.h                           |   43 
 security/selinux/ss/ebitmap.c                             |  144 
 security/selinux/ss/ebitmap.h                             |    6 
 security/selinux/ss/mls.c                                 |  176 -
 security/selinux/ss/mls.h                                 |   41 
 security/selinux/ss/services.c                            |  557 +++
 security/selinux/xfrm.c                                   |  216 +
 usr/Makefile                                              |    3 
 240 files changed, 14499 insertions(+), 4912 deletions(-)

diff -puN CREDITS~git-net CREDITS
--- a/CREDITS~git-net
+++ a/CREDITS
@@ -2384,6 +2384,13 @@ N: Thomas Molina
 E: tmolina@cablespeed.com
 D: bug fixes, documentation, minor hackery
 
+N: Paul Moore
+E: paul.moore@hp.com
+D: NetLabel author
+S: Hewlett-Packard
+S: 110 Spit Brook Road
+S: Nashua, NH 03062
+
 N: James Morris
 E: jmorris@namei.org
 W: http://namei.org/
diff -puN Documentation/00-INDEX~git-net Documentation/00-INDEX
--- a/Documentation/00-INDEX~git-net
+++ a/Documentation/00-INDEX
@@ -184,6 +184,8 @@ mtrr.txt
 	- how to use PPro Memory Type Range Registers to increase performance.
 nbd.txt
 	- info on a TCP implementation of a network block device.
+netlabel/
+	- directory with information on the NetLabel subsystem.
 networking/
 	- directory with info on various aspects of networking with Linux.
 nfsroot.txt
diff -puN /dev/null Documentation/netlabel/00-INDEX
--- /dev/null
+++ a/Documentation/netlabel/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+	- this file.
+cipso_ipv4.txt
+	- documentation on the IPv4 CIPSO protocol engine.
+draft-ietf-cipso-ipsecurity-01.txt
+	- IETF draft of the CIPSO protocol, dated 16 July 1992.
+introduction.txt
+	- NetLabel introduction, READ THIS FIRST.
+lsm_interface.txt
+	- documentation on the NetLabel kernel security module API.
diff -puN /dev/null Documentation/netlabel/cipso_ipv4.txt
--- /dev/null
+++ a/Documentation/netlabel/cipso_ipv4.txt
@@ -0,0 +1,48 @@
+NetLabel CIPSO/IPv4 Protocol Engine
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial IP
+Security Option (CIPSO) draft from July 16, 1992.  A copy of this draft can be
+found in this directory, consult '00-INDEX' for the filename.  While the IETF
+draft never made it to an RFC standard it has become a de-facto standard for
+labeled networking and is used in many trusted operating systems.
+
+ * Outbound Packet Processing
+
+The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by
+adding the CIPSO label to the socket.  This causes all packets leaving the
+system through the socket to have the CIPSO IP option applied.  The socket's
+CIPSO label can be changed at any point in time, however, it is recommended
+that it is set upon the socket's creation.  The LSM can set the socket's CIPSO
+label by using the NetLabel security module API; if the NetLabel "domain" is
+configured to use CIPSO for packet labeling then a CIPSO IP option will be
+generated and attached to the socket.
+
+ * Inbound Packet Processing
+
+The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the
+IP layer without any special handling required by the LSM.  However, in order
+to decode and translate the CIPSO label on the packet the LSM must use the
+NetLabel security module API to extract the security attributes of the packet.
+This is typically done at the socket layer using the 'socket_sock_rcv_skb()'
+LSM hook.
+
+ * Label Translation
+
+The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security
+attributes such as sensitivity level and category to values which are
+appropriate for the host.  These mappings are defined as part of a CIPSO
+Domain Of Interpretation (DOI) definition and are configured through the
+NetLabel user space communication layer.  Each DOI definition can have a
+different security attribute mapping table.
+
+ * Label Translation Cache
+
+The NetLabel system provides a framework for caching security attribute
+mappings from the network labels to the corresponding LSM identifiers.  The
+CIPSO/IPv4 protocol engine supports this caching mechanism.
diff -puN /dev/null Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
--- /dev/null
+++ a/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
@@ -0,0 +1,791 @@
+IETF CIPSO Working Group
+16 July, 1992
+
+
+
+                 COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
+
+
+
+1.    Status
+
+This Internet Draft provides the high level specification for a Commercial
+IP Security Option (CIPSO).  This draft reflects the version as approved by
+the CIPSO IETF Working Group.  Distribution of this memo is unlimited.
+
+This document is an Internet Draft.  Internet Drafts are working documents
+of the Internet Engineering Task Force (IETF), its Areas, and its Working
+Groups. Note that other groups may also distribute working documents as
+Internet Drafts.
+
+Internet Drafts are draft documents valid for a maximum of six months.
+Internet Drafts may be updated, replaced, or obsoleted by other documents
+at any time.  It is not appropriate to use Internet Drafts as reference
+material or to cite them other than as a "working draft" or "work in
+progress."
+
+Please check the I-D abstract listing contained in each Internet Draft
+directory to learn the current status of this or any other Internet Draft.
+
+
+
+
+2.    Background
+
+Currently the Internet Protocol includes two security options.  One of
+these options is the DoD Basic Security Option (BSO) (Type 130) which allows
+IP datagrams to be labeled with security classifications.  This option
+provides sixteen security classifications and a variable number of handling
+restrictions.  To handle additional security information, such as security
+categories or compartments, another security option (Type 133) exists and
+is referred to as the DoD Extended Security Option (ESO).  The values for
+the fixed fields within these two options are administered by the Defense
+Information Systems Agency (DISA).
+
+Computer vendors are now building commercial operating systems with
+mandatory access controls and multi-level security.  These systems are
+no longer built specifically for a particular group in the defense or
+intelligence communities.  They are generally available commercial systems
+for use in a variety of government and civil sector environments.
+
+The small number of ESO format codes can not support all the possible
+applications of a commercial security option.  The BSO and ESO were
+designed to only support the United States DoD.  CIPSO has been designed
+to support multiple security policies.  This Internet Draft provides the
+format and procedures required to support a Mandatory Access Control
+security policy.  Support for additional security policies shall be
+defined in future RFCs.
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 1]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+3.    CIPSO Format
+
+Option type: 134 (Class 0, Number 6, Copy on Fragmentation)
+Option length: Variable
+
+This option permits security related information to be passed between
+systems within a single Domain of Interpretation (DOI).  A DOI is a
+collection of systems which agree on the meaning of particular values
+in the security option.  An authority that has been assigned a DOI
+identifier will define a mapping between appropriate CIPSO field values
+and their human readable equivalent.  This authority will distribute that
+mapping to hosts within the authority's domain.  These mappings may be
+sensitive, therefore a DOI authority is not required to make these
+mappings available to anyone other than the systems that are included in
+the DOI.
+
+This option MUST be copied on fragmentation.  This option appears at most
+once in a datagram.  All multi-octet fields in the option are defined to be
+transmitted in network byte order.  The format of this option is as follows:
+
++----------+----------+------//------+-----------//---------+
+| 10000110 | LLLLLLLL | DDDDDDDDDDDD | TTTTTTTTTTTTTTTTTTTT |
++----------+----------+------//------+-----------//---------+
+
+  TYPE=134    OPTION    DOMAIN OF               TAGS
+              LENGTH    INTERPRETATION
+
+
+                Figure 1. CIPSO Format
+
+
+3.1    Type
+
+This field is 1 octet in length.  Its value is 134.
+
+
+3.2    Length
+
+This field is 1 octet in length.  It is the total length of the option
+including the type and length fields.  With the current IP header length
+restriction of 40 octets the value of this field MUST not exceed 40.
+
+
+3.3    Domain of Interpretation Identifier
+
+This field is an unsigned 32 bit integer.  The value 0 is reserved and MUST
+not appear as the DOI identifier in any CIPSO option.  Implementations
+should assume that the DOI identifier field is not aligned on any particular
+byte boundary.
+
+To conserve space in the protocol, security levels and categories are
+represented by numbers rather than their ASCII equivalent.  This requires
+a mapping table within CIPSO hosts to map these numbers to their
+corresponding ASCII representations.  Non-related groups of systems may
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 2]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+have their own unique mappings.  For example, one group of systems may
+use the number 5 to represent Unclassified while another group may use the
+number 1 to represent that same security level.  The DOI identifier is used
+to identify which mapping was used for the values within the option.
+
+
+3.4    Tag Types
+
+A common format for passing security related information is necessary
+for interoperability.  CIPSO uses sets of "tags" to contain the security
+information relevant to the data in the IP packet.  Each tag begins with
+a tag type identifier followed by the length of the tag and ends with the
+actual security information to be passed.  All multi-octet fields in a tag
+are defined to be transmitted in network byte order.  Like the DOI
+identifier field in the CIPSO header, implementations should assume that
+all tags, as well as fields within a tag, are not aligned on any particular
+octet boundary.   The tag types defined in this document contain alignment
+bytes to assist alignment of some information, however alignment can not
+be guaranteed if CIPSO is not the first IP option.
+
+CIPSO tag types 0 through 127 are reserved for defining standard tag
+formats.  Their definitions will be published in RFCs.  Tag types whose
+identifiers are greater than 127 are defined by the DOI authority and may
+only be meaningful in certain Domains of Interpretation.  For these tag
+types, implementations will require the DOI identifier as well as the tag
+number to determine the security policy and the format associated with the
+tag.  Use of tag types above 127 are restricted to closed networks where
+interoperability with other networks will not be an issue.  Implementations
+that support a tag type greater than 127 MUST support at least one DOI that
+requires only tag types 1 to 127.
+
+Tag type 0 is reserved. Tag types 1, 2, and 5 are defined in this
+Internet Draft.  Types 3 and 4 are reserved for work in progress.
+The standard format for all current and future CIPSO tags is shown below:
+
++----------+----------+--------//--------+
+| TTTTTTTT | LLLLLLLL | IIIIIIIIIIIIIIII |
++----------+----------+--------//--------+
+    TAG       TAG         TAG
+    TYPE      LENGTH      INFORMATION
+
+    Figure 2:  Standard Tag Format
+
+In the three tag types described in this document, the length and count
+restrictions are based on the current IP limitation of 40 octets for all
+IP options.  If the IP header is later expanded, then the length and count
+restrictions specified in this document may increase to use the full area
+provided for IP options.
+
+
+3.4.1    Tag Type Classes
+
+Tag classes consist of tag types that have common processing requirements
+and support the same security policy.  The three tags defined in this
+Internet Draft belong to the Mandatory Access Control (MAC) Sensitivity
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 3]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+class and support the MAC Sensitivity security policy.
+
+
+3.4.2    Tag Type 1
+
+This is referred to as the "bit-mapped" tag type.  Tag type 1 is included
+in the MAC Sensitivity tag type class.  The format of this tag type is as
+follows:
+
++----------+----------+----------+----------+--------//---------+
+| 00000001 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+--------//---------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY    BIT MAP OF
+    TYPE      LENGTH   OCTET      LEVEL          CATEGORIES
+
+            Figure 3. Tag Type 1 Format
+
+
+3.4.2.1    Tag Type
+
+This field is 1 octet in length and has a value of 1.
+
+
+3.4.2.2    Tag Length
+
+This field is 1 octet in length.  It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.2.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category bitmap field on an even octet boundary.  This will
+speed many implementations including router implementations.
+
+
+3.4.2.4    Sensitivity Level
+
+This field is 1 octet in length.  Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.2.5    Bit Map of Categories
+
+The length of this field is variable and ranges from 0 to 30 octets.  This
+provides representation of categories 0 to 239.  The ordering of the bits
+is left to right or MSB to LSB.  For example category 0 is represented by
+the most significant bit of the first byte and category 15 is represented
+by the least significant bit of the second byte.  Figure 4 graphically
+shows this ordering.  Bit N is binary 1 if category N is part of the label
+for the datagram, and bit N is binary 0 if category N is not part of the
+label.  Except for the optimized tag 1 format described in the next section,
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 4]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+minimal encoding SHOULD be used resulting in no trailing zero octets in the
+category bitmap.
+
+        octet 0  octet 1  octet 2  octet 3  octet 4  octet 5
+        XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX . . .
+bit     01234567 89111111 11112222 22222233 33333333 44444444
+number             012345 67890123 45678901 23456789 01234567
+
+            Figure 4. Ordering of Bits in Tag 1 Bit Map
+
+
+3.4.2.6    Optimized Tag 1 Format
+
+Routers work most efficiently when processing fixed length fields.  To
+support these routers there is an optimized form of tag type 1.  The format
+does not change.  The only change is to the category bitmap which is set to
+a constant length of 10 octets.  Trailing octets required to fill out the 10
+octets are zero filled.  Ten octets, allowing for 80 categories, was chosen
+because it makes the total length of the CIPSO option 20 octets.  If CIPSO
+is the only option then the option will be full word aligned and additional
+filler octets will not be required.
+
+
+3.4.3    Tag Type 2
+
+This is referred to as the "enumerated" tag type.  It is used to describe
+large but sparsely populated sets of categories.  Tag type 2 is in the MAC
+Sensitivity tag type class.  The format of this tag type is as follows:
+
++----------+----------+----------+----------+-------------//-------------+
+| 00000010 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+-------------//-------------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY         ENUMERATED
+    TYPE      LENGTH   OCTET      LEVEL               CATEGORIES
+
+                Figure 5. Tag Type 2 Format
+
+
+3.4.3.1     Tag Type
+
+This field is one octet in length and has a value of 2.
+
+
+3.4.3.2    Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.3.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category field on an even octet boundary.  This will
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 5]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+speed many implementations including router implementations.
+
+
+3.4.3.4    Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the
+maximum value.
+
+
+3.4.3.5    Enumerated Categories
+
+In this tag, categories are represented by their actual value rather than
+by their position within a bit field.  The length of each category is 2
+octets.  Up to 15 categories may be represented by this tag.  Valid values
+for categories are 0 to 65534.  Category 65535 is not a valid category
+value.  The categories MUST be listed in ascending order within the tag.
+
+
+3.4.4    Tag Type 5
+
+This is referred to as the "range" tag type.  It is used to represent
+labels where all categories in a range, or set of ranges, are included
+in the sensitivity label.  Tag type 5 is in the MAC Sensitivity tag type
+class.  The format of this tag type is as follows:
+
++----------+----------+----------+----------+------------//-------------+
+| 00000101 | LLLLLLLL | 00000000 | LLLLLLLL |  Top/Bottom | Top/Bottom  |
++----------+----------+----------+----------+------------//-------------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY        CATEGORY RANGES
+    TYPE      LENGTH   OCTET      LEVEL
+
+                     Figure 6. Tag Type 5 Format
+
+
+3.4.4.1     Tag Type
+
+This field is one octet in length and has a value of 5.
+
+
+3.4.4.2    Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.4.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category range field on an even octet boundary.  This will
+speed many implementations including router implementations.
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 6]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+3.4.4.4    Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.4.5    Category Ranges
+
+A category range is a 4 octet field comprised of the 2 octet index of the
+highest numbered category followed by the 2 octet index of the lowest
+numbered category.  These range endpoints are inclusive within the range of
+categories.  All categories within a range are included in the sensitivity
+label.  This tag may contain a maximum of 7 category pairs.  The bottom
+category endpoint for the last pair in the tag MAY be omitted and SHOULD be
+assumed to be 0.  The ranges MUST be non-overlapping and be listed in
+descending order.  Valid values for categories are 0 to 65534.  Category
+65535 is not a valid category value.
+
+
+3.4.5     Minimum Requirements
+
+A CIPSO implementation MUST be capable of generating at least tag type 1 in
+the non-optimized form.  In addition, a CIPSO implementation MUST be able
+to receive any valid tag type 1 even those using the optimized tag type 1
+format.
+
+
+4.    Configuration Parameters
+
+The configuration parameters defined below are required for all CIPSO hosts,
+gateways, and routers that support multiple sensitivity labels.  A CIPSO
+host is defined to be the origination or destination system for an IP
+datagram.  A CIPSO gateway provides IP routing services between two or more
+IP networks and may be required to perform label translations between
+networks.  A CIPSO gateway may be an enhanced CIPSO host or it may just
+provide gateway services with no end system CIPSO capabilities.  A CIPSO
+router is a dedicated IP router that routes IP datagrams between two or more
+IP networks.
+
+An implementation of CIPSO on a host MUST have the capability to reject a
+datagram for reasons that the information contained can not be adequately
+protected by the receiving host or if acceptance may result in violation of
+the host or network security policy.  In addition, a CIPSO gateway or router
+MUST be able to reject datagrams going to networks that can not provide
+adequate protection or may violate the network's security policy.  To
+provide this capability the following minimal set of configuration
+parameters are required for CIPSO implementations:
+
+HOST_LABEL_MAX - This parameter contains the maximum sensitivity label that
+a CIPSO host is authorized to handle.  All datagrams that have a label
+greater than this maximum MUST be rejected by the CIPSO host.  This
+parameter does not apply to CIPSO gateways or routers.  This parameter need
+not be defined explicitly as it can be implicitly derived from the
+PORT_LABEL_MAX parameters for the associated interfaces.
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 7]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+HOST_LABEL_MIN - This parameter contains the minimum sensitivity label that
+a CIPSO host is authorized to handle.  All datagrams that have a label less
+than this minimum MUST be rejected by the CIPSO host.  This parameter does
+not apply to CIPSO gateways or routers.  This parameter need not be defined
+explicitly as it can be implicitly derived from the PORT_LABEL_MIN
+parameters for the associated interfaces.
+
+PORT_LABEL_MAX - This parameter contains the maximum sensitivity label for
+all datagrams that may exit a particular network interface port.  All
+outgoing datagrams that have a label greater than this maximum MUST be
+rejected by the CIPSO system.  The label within this parameter MUST be
+less than or equal to the label within the HOST_LABEL_MAX parameter.  This
+parameter does not apply to CIPSO hosts that support only one network port.
+
+PORT_LABEL_MIN - This parameter contains the minimum sensitivity label for
+all datagrams that may exit a particular network interface port.  All
+outgoing datagrams that have a label less than this minimum MUST be
+rejected by the CIPSO system.  The label within this parameter MUST be
+greater than or equal to the label within the HOST_LABEL_MIN parameter.
+This parameter does not apply to CIPSO hosts that support only one network
+port.
+
+PORT_DOI - This parameter is used to assign a DOI identifier value to a
+particular network interface port.  All CIPSO labels within datagrams
+going out this port MUST use the specified DOI identifier.  All CIPSO
+hosts and gateways MUST support either this parameter, the NET_DOI
+parameter, or the HOST_DOI parameter.
+
+NET_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP network address.  All CIPSO labels within datagrams destined
+for the particular IP network MUST use the specified DOI identifier.  All
+CIPSO hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the HOST_DOI parameter.
+
+HOST_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP host address.  All CIPSO labels within datagrams destined for
+the particular IP host will use the specified DOI identifier.  All CIPSO
+hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the NET_DOI parameter.
+
+This list represents the minimal set of configuration parameters required
+to be compliant.  Implementors are encouraged to add to this list to
+provide enhanced functionality and control.  For example, many security
+policies may require both incoming and outgoing datagrams be checked against
+the port and host label ranges.
+
+
+4.1    Port Range Parameters
+
+The labels represented by the PORT_LABEL_MAX and PORT_LABEL_MIN parameters
+MAY be in CIPSO or local format.  Some CIPSO systems, such as routers, may
+want to have the range parameters expressed in CIPSO format so that incoming
+labels do not have to be converted to a local format before being compared
+against the range.  If multiple DOIs are supported by one of these CIPSO
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 8]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+systems then multiple port range parameters would be needed, one set for
+each DOI supported on a particular port.
+
+The port range will usually represent the total set of labels that may
+exist on the logical network accessed through the corresponding network
+interface.  It may, however, represent a subset of these labels that are
+allowed to enter the CIPSO system.
+
+
+4.2    Single Label CIPSO Hosts
+
+CIPSO implementations that support only one label are not required to
+support the parameters described above.  These limited implementations are
+only required to support a NET_LABEL parameter.  This parameter contains
+the CIPSO label that may be inserted in datagrams that exit the host.  In
+addition, the host MUST reject any incoming datagram that has a label which
+is not equivalent to the NET_LABEL parameter.
+
+
+5.    Handling Procedures
+
+This section describes the processing requirements for incoming and
+outgoing IP datagrams.  Just providing the correct CIPSO label format
+is not enough.  Assumptions will be made by one system on how a
+receiving system will handle the CIPSO label.  Wrong assumptions may
+lead to non-interoperability or even a security incident.  The
+requirements described below represent the minimal set needed for
+interoperability and that provide users some level of confidence.
+Many other requirements could be added to increase user confidence,
+however at the risk of restricting creativity and limiting vendor
+participation.
+
+
+5.1    Input Procedures
+
+All datagrams received through a network port MUST have a security label
+associated with them, either contained in the datagram or assigned to the
+receiving port.  Without this label the host, gateway, or router will not
+have the information it needs to make security decisions.  This security
+label will be obtained from the CIPSO if the option is present in the
+datagram.  See section 4.1.2 for handling procedures for unlabeled
+datagrams.  This label will be compared against the PORT (if appropriate)
+and HOST configuration parameters defined in section 3.
+
+If any field within the CIPSO option, such as the DOI identifier, is not
+recognized the IP datagram is discarded and an ICMP "parameter problem"
+(type 12) is generated and returned.  The ICMP code field is set to "bad
+parameter" (code 0) and the pointer is set to the start of the CIPSO field
+that is unrecognized.
+
+If the contents of the CIPSO are valid but the security label is
+outside of the configured host or port label range, the datagram is
+discarded and an ICMP "destination unreachable" (type 3) is generated
+and returned.  The code field of the ICMP is set to "communication with
+destination network administratively prohibited" (code 9) or to
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 9]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+"communication with destination host administratively prohibited"
+(code 10).  The value of the code field used is dependent upon whether
+the originator of the ICMP message is acting as a CIPSO host or a CIPSO
+gateway.  The recipient of the ICMP message MUST be able to handle either
+value.  The same procedure is performed if a CIPSO can not be added to an
+IP packet because it is too large to fit in the IP options area.
+
+If the error is triggered by receipt of an ICMP message, the message
+is discarded and no response is permitted (consistent with general ICMP
+processing rules).
+
+
+5.1.1    Unrecognized tag types
+
+The default condition for any CIPSO implementation is that an
+unrecognized tag type MUST be treated as a "parameter problem" and
+handled as described in section 4.1.  A CIPSO implementation MAY allow
+the system administrator to identify tag types that may safely be
+ignored.  This capability is an allowable enhancement, not a
+requirement.
+
+
+5.1.2    Unlabeled Packets
+
+A network port may be configured to not require a CIPSO label for all
+incoming  datagrams.  For this configuration a CIPSO label must be
+assigned to that network port and associated with all unlabeled IP
+datagrams.  This capability might be used for single level networks or
+networks that have CIPSO and non-CIPSO hosts and the non-CIPSO hosts
+all operate at the same label.
+
+If a CIPSO option is required and none is found, the datagram is
+discarded and an ICMP "parameter problem" (type 12) is generated and
+returned to the originator of the datagram.  The code field of the ICMP
+is set to "option missing" (code 1) and the ICMP pointer is set to 134
+(the value of the option type for the missing CIPSO option).
+
+
+5.2    Output Procedures
+
+A CIPSO option MUST appear only once in a datagram.  Only one tag type
+from the MAC Sensitivity class MAY be included in a CIPSO option.  Given
+the current set of defined tag types, this means that CIPSO labels at
+first will contain only one tag.
+
+All datagrams leaving a CIPSO system MUST meet the following condition:
+
+        PORT_LABEL_MIN <= CIPSO label <= PORT_LABEL_MAX
+
+If this condition is not satisfied the datagram MUST be discarded.
+If the CIPSO system only supports one port, the HOST_LABEL_MIN and the
+HOST_LABEL_MAX parameters MAY be substituted for the PORT parameters in
+the above condition.
+
+The DOI identifier to be used for all outgoing datagrams is configured by
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 10]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+the administrator.  If port level DOI identifier assignment is used, then
+the PORT_DOI configuration parameter MUST contain the DOI identifier to
+use.  If network level DOI assignment is used, then the NET_DOI parameter
+MUST contain the DOI identifier to use.  And if host level DOI assignment
+is employed, then the HOST_DOI parameter MUST contain the DOI identifier
+to use.  A CIPSO implementation need only support one level of DOI
+assignment.
+
+
+5.3    DOI Processing Requirements
+
+A CIPSO implementation MUST support at least one DOI and SHOULD support
+multiple DOIs.  System and network administrators are cautioned to
+ensure that at least one DOI is common within an IP network to allow for
+broadcasting of IP datagrams.
+
+CIPSO gateways MUST be capable of translating a CIPSO option from one
+DOI to another when forwarding datagrams between networks.  For
+efficiency purposes this capability is only a desired feature for CIPSO
+routers.
+
+
+5.4    Label of ICMP Messages
+
+The CIPSO label to be used on all outgoing ICMP messages MUST be equivalent
+to the label of the datagram that caused the ICMP message.  If the ICMP was
+generated due to a problem associated with the original CIPSO label then the
+following responses are allowed:
+
+  a.  Use the CIPSO label of the original IP datagram
+  b.  Drop the original datagram with no return message generated
+
+In most cases these options will have the same effect.  If you can not
+interpret the label or if it is outside the label range of your host or
+interface then an ICMP message with the same label will probably not be
+able to exit the system.
+
+
+6.    Assignment of DOI Identifier Numbers                                   =
+
+Requests for assignment of a DOI identifier number should be addressed to
+the Internet Assigned Numbers Authority (IANA).
+
+
+7.    Acknowledgements
+
+Much of the material in this RFC is based on (and copied from) work
+done by Gary Winiger of Sun Microsystems and published as Commercial
+IP Security Option at the INTEROP 89, Commercial IPSO Workshop.
+
+
+8.    Author's Address
+
+To submit mail for distribution to members of the IETF CIPSO Working
+Group, send mail to: cipso@wdl1.wdl.loral.com.
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 11]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+To be added to or deleted from this distribution, send mail to:
+cipso-request@wdl1.wdl.loral.com.
+
+
+9.    References
+
+RFC 1038, "Draft Revised IP Security Option", M. St. Johns, IETF, January
+1988.
+
+RFC 1108, "U.S. Department of Defense Security Options
+for the Internet Protocol", Stephen Kent, IAB, 1 March, 1991.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 12]
+
+
+
diff -puN /dev/null Documentation/netlabel/introduction.txt
--- /dev/null
+++ a/Documentation/netlabel/introduction.txt
@@ -0,0 +1,46 @@
+NetLabel Introduction
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+August 2, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can be used by kernel security modules to attach
+security attributes to outgoing network packets generated from user space
+applications and read security attributes from incoming network packets.  It
+is composed of three main components, the protocol engines, the communication
+layer, and the kernel security module API.
+
+ * Protocol Engines
+
+The protocol engines are responsible for both applying and retrieving the
+network packet's security attributes.  If any translation between the network
+security attributes and those on the host are required then the protocol
+engine will handle those tasks as well.  Other kernel subsystems should
+refrain from calling the protocol engines directly, instead they should use
+the NetLabel kernel security module API described below.
+
+Detailed information about each NetLabel protocol engine can be found in this
+directory, consult '00-INDEX' for filenames.
+
+ * Communication Layer
+
+The communication layer exists to allow NetLabel configuration and monitoring
+from user space.  The NetLabel communication layer uses a message based
+protocol built on top of the Generic NETLINK transport mechanism.  The exact
+formatting of these NetLabel messages as well as the Generic NETLINK family
+names can be found in the the 'net/netlabel/' directory as comments in the
+header files as well as in 'include/net/netlabel.h'.
+
+ * Security Module API
+
+The purpose of the NetLabel security module API is to provide a protocol
+independent interface to the underlying NetLabel protocol engines.  In addition
+to protocol independence, the security module API is designed to be completely
+LSM independent which should allow multiple LSMs to leverage the same code
+base.
+
+Detailed information about the NetLabel security module API can be found in the
+'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file
+found in this directory.
diff -puN /dev/null Documentation/netlabel/lsm_interface.txt
--- /dev/null
+++ a/Documentation/netlabel/lsm_interface.txt
@@ -0,0 +1,47 @@
+NetLabel Linux Security Module Interface
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can set and retrieve security attributes from
+network packets.  It is intended to be used by LSM developers who want to make
+use of a common code base for several different packet labeling protocols.
+The NetLabel security module API is defined in 'include/net/netlabel.h' but a
+brief overview is given below.
+
+ * NetLabel Security Attributes
+
+Since NetLabel supports multiple different packet labeling protocols and LSMs
+it uses the concept of security attributes to refer to the packet's security
+labels.  The NetLabel security attributes are defined by the
+'netlbl_lsm_secattr' structure in the NetLabel header file.  Internally the
+NetLabel subsystem converts the security attributes to and from the correct
+low-level packet label depending on the NetLabel build time and run time
+configuration.  It is up to the LSM developer to translate the NetLabel
+security attributes into whatever security identifiers are in use for their
+particular LSM.
+
+ * NetLabel LSM Protocol Operations
+
+These are the functions which allow the LSM developer to manipulate the labels
+on outgoing packets as well as read the labels on incoming packets.  Functions
+exist to operate both on sockets as well as the sk_buffs directly.  These high
+level functions are translated into low level protocol operations based on how
+the administrator has configured the NetLabel subsystem.
+
+ * NetLabel Label Mapping Cache Operations
+
+Depending on the exact configuration, translation between the network packet
+label and the internal LSM security identifier can be time consuming.  The
+NetLabel label mapping cache is a caching mechanism which can be used to
+sidestep much of this overhead once a mapping has been established.  Once the
+LSM has received a packet, used NetLabel to decode it's security attributes,
+and translated the security attributes into a LSM internal identifier the LSM
+can use the NetLabel caching functions to associate the LSM internal
+identifier with the network packet's label.  This means that in the future
+when a incoming packet matches a cached value not only are the internal
+NetLabel translation mechanisms bypassed but the LSM translation mechanisms are
+bypassed as well which should result in a significant reduction in overhead.
diff -puN Documentation/networking/ip-sysctl.txt~git-net Documentation/networking/ip-sysctl.txt
--- a/Documentation/networking/ip-sysctl.txt~git-net
+++ a/Documentation/networking/ip-sysctl.txt
@@ -369,6 +369,41 @@ tcp_slow_start_after_idle - BOOLEAN
 	be timed out after an idle period.
 	Default: 1
 
+CIPSOv4 Variables:
+
+cipso_cache_enable - BOOLEAN
+	If set, enable additions to and lookups from the CIPSO label mapping
+	cache.  If unset, additions are ignored and lookups always result in a
+	miss.  However, regardless of the setting the cache is still
+	invalidated when required when means you can safely toggle this on and
+	off and the cache will always be "safe".
+	Default: 1
+
+cipso_cache_bucket_size - INTEGER
+	The CIPSO label cache consists of a fixed size hash table with each
+	hash bucket containing a number of cache entries.  This variable limits
+	the number of entries in each hash bucket; the larger the value the
+	more CIPSO label mappings that can be cached.  When the number of
+	entries in a given hash bucket reaches this limit adding new entries
+	causes the oldest entry in the bucket to be removed to make room.
+	Default: 10
+
+cipso_rbm_optfmt - BOOLEAN
+	Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of
+	the CIPSO draft specification (see Documentation/netlabel for details).
+	This means that when set the CIPSO tag will be padded with empty
+	categories in order to make the packet data 32-bit aligned.
+	Default: 0
+
+cipso_rbm_structvalid - BOOLEAN
+	If set, do a very strict check of the CIPSO option when
+	ip_options_compile() is called.  If unset, relax the checks done during
+	ip_options_compile().  Either way is "safe" as errors are caught else
+	where in the CIPSO processing code but setting this to 0 (False) should
+	result in less work (i.e. it should be faster) but could cause problems
+	with other implementations that require strict checking.
+	Default: 0
+
 IP Variables:
 
 ip_local_port_range - 2 INTEGERS
diff -puN /dev/null Documentation/networking/secid.txt
--- /dev/null
+++ a/Documentation/networking/secid.txt
@@ -0,0 +1,14 @@
+flowi structure:
+
+The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate
+the label of the flow. This label of the flow is currently used in selecting
+matching labeled xfrm(s).
+
+If this is an outbound flow, the label is derived from the socket, if any, or
+the incoming packet this flow is being generated as a response to (e.g. tcp
+resets, timewait ack, etc.). It is also conceivable that the label could be
+derived from other sources such as process context, device, etc., in special
+cases, as may be appropriate.
+
+If this is an inbound flow, the label is derived from the IPSec security
+associations, if any, used by the packet.
diff -puN Makefile~git-net Makefile
--- a/Makefile~git-net
+++ a/Makefile
@@ -436,12 +436,13 @@ core-y		:= usr/
 endif # KBUILD_EXTMOD
 
 ifeq ($(dot-config),1)
-# In this section, we need .config
+# Read in config
+-include include/config/auto.conf
 
+ifeq ($(KBUILD_EXTMOD),)
 # Read in dependencies to all Kconfig* files, make sure to run
 # oldconfig if changes are detected.
 -include include/config/auto.conf.cmd
--include include/config/auto.conf
 
 # To avoid any implicit rule to kick in, define an empty command
 $(KCONFIG_CONFIG) include/config/auto.conf.cmd: ;
@@ -451,16 +452,27 @@ $(KCONFIG_CONFIG) include/config/auto.co
 # if auto.conf.cmd is missing then we are probably in a cleaned tree so
 # we execute the config step to be sure to catch updated Kconfig files
 include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd
-ifeq ($(KBUILD_EXTMOD),)
 	$(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig
 else
-	$(error kernel configuration not valid - run 'make prepare' in $(srctree) to update it)
-endif
+# external modules needs include/linux/autoconf.h and include/config/auto.conf
+# but do not care if they are up-to-date. Use auto.conf to trigger the test
+PHONY += include/config/auto.conf
+
+include/config/auto.conf:
+	$(Q)test -e include/linux/autoconf.h -a -e $@ || (		\
+	echo;								\
+	echo "  ERROR: Kernel configuration is invalid.";		\
+	echo "         include/linux/autoconf.h or $@ are missing.";	\
+	echo "         Run 'make oldconfig && make prepare' on kernel src to fix it.";	\
+	echo;								\
+	/bin/false)
+
+endif # KBUILD_EXTMOD
 
 else
 # Dummy target needed, because used as prerequisite
 include/config/auto.conf: ;
-endif
+endif # $(dot-config)
 
 # The all: target is the default when no target is given on the
 # command line.
diff -puN drivers/atm/he.c~git-net drivers/atm/he.c
--- a/drivers/atm/he.c~git-net
+++ a/drivers/atm/he.c
@@ -1912,7 +1912,7 @@ he_service_rbrq(struct he_dev *he_dev, i
 				skb->tail = skb->data + skb->len;
 #ifdef USE_CHECKSUM_HW
 				if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
-					skb->ip_summed = CHECKSUM_HW;
+					skb->ip_summed = CHECKSUM_COMPLETE;
 					skb->csum = TCP_CKSUM(skb->data,
 							he_vcc->pdu_len);
 				}
diff -puN drivers/net/3c59x.c~git-net drivers/net/3c59x.c
--- a/drivers/net/3c59x.c~git-net
+++ a/drivers/net/3c59x.c
@@ -2076,7 +2076,7 @@ boomerang_start_xmit(struct sk_buff *skb
 
 	vp->tx_ring[entry].next = 0;
 #if DO_ZEROCOPY
-	if (skb->ip_summed != CHECKSUM_HW)
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
 	else
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
diff -puN drivers/net/8139cp.c~git-net drivers/net/8139cp.c
--- a/drivers/net/8139cp.c~git-net
+++ a/drivers/net/8139cp.c
@@ -813,7 +813,7 @@ static int cp_start_xmit (struct sk_buff
 
 		if (mss)
 			flags |= LargeSend | ((mss & MSSMask) << MSSShift);
-		else if (skb->ip_summed == CHECKSUM_HW) {
+		else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			const struct iphdr *ip = skb->nh.iph;
 			if (ip->protocol == IPPROTO_TCP)
 				flags |= IPCS | TCPCS;
@@ -867,7 +867,7 @@ static int cp_start_xmit (struct sk_buff
 			if (mss)
 				ctrl |= LargeSend |
 					((mss & MSSMask) << MSSShift);
-			else if (skb->ip_summed == CHECKSUM_HW) {
+			else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				if (ip->protocol == IPPROTO_TCP)
 					ctrl |= IPCS | TCPCS;
 				else if (ip->protocol == IPPROTO_UDP)
@@ -898,7 +898,7 @@ static int cp_start_xmit (struct sk_buff
 		txd->addr = cpu_to_le64(first_mapping);
 		wmb();
 
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			if (ip->protocol == IPPROTO_TCP)
 				txd->opts1 = cpu_to_le32(first_eor | first_len |
 							 FirstFrag | DescOwn |
diff -puN drivers/net/acenic.c~git-net drivers/net/acenic.c
--- a/drivers/net/acenic.c~git-net
+++ a/drivers/net/acenic.c
@@ -2036,7 +2036,7 @@ static void ace_rx_int(struct net_device
 		 */
 		if (bd_flags & BD_FLG_TCP_UDP_SUM) {
 			skb->csum = htons(csum);
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else {
 			skb->ip_summed = CHECKSUM_NONE;
 		}
@@ -2507,7 +2507,7 @@ restart:
 
 		mapping = ace_map_tx_skb(ap, skb, skb, idx);
 		flagsize = (skb->len << 16) | (BD_FLG_END);
-		if (skb->ip_summed == CHECKSUM_HW)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
 #if ACENIC_DO_VLAN
 		if (vlan_tx_tag_present(skb)) {
@@ -2530,7 +2530,7 @@ restart:
 
 		mapping = ace_map_tx_skb(ap, skb, NULL, idx);
 		flagsize = (skb_headlen(skb) << 16);
-		if (skb->ip_summed == CHECKSUM_HW)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
 #if ACENIC_DO_VLAN
 		if (vlan_tx_tag_present(skb)) {
@@ -2556,7 +2556,7 @@ restart:
 					       PCI_DMA_TODEVICE);
 
 			flagsize = (frag->size << 16);
-			if (skb->ip_summed == CHECKSUM_HW)
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				flagsize |= BD_FLG_TCP_UDP_SUM;
 			idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
 
diff -puN drivers/net/arcnet/com20020-pci.c~git-net drivers/net/arcnet/com20020-pci.c
--- a/drivers/net/arcnet/com20020-pci.c~git-net
+++ a/drivers/net/arcnet/com20020-pci.c
@@ -161,6 +161,7 @@ static struct pci_device_id com20020pci_
 	{ 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
+	{ 0x10B5, 0x9030, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x10B5, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{0,}
 };
diff -puN drivers/net/bnx2.c~git-net drivers/net/bnx2.c
--- a/drivers/net/bnx2.c~git-net
+++ a/drivers/net/bnx2.c
@@ -4418,7 +4418,7 @@ bnx2_start_xmit(struct sk_buff *skb, str
 	ring_prod = TX_RING_IDX(prod);
 
 	vlan_tag_flags = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM;
 	}
 
diff -puN drivers/net/cassini.c~git-net drivers/net/cassini.c
--- a/drivers/net/cassini.c~git-net
+++ a/drivers/net/cassini.c
@@ -2167,7 +2167,7 @@ end_copy_pkt:
 			cas_page_unmap(addr);
 	}
 	skb->csum = ntohs(i ^ 0xffff);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_COMPLETE;
 	skb->protocol = eth_type_trans(skb, cp->dev);
 	return len;
 }
@@ -2821,7 +2821,7 @@ static inline int cas_xmit_tx_ringN(stru
 	}
 
 	ctrl = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u64 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u64) (skb->h.raw - skb->data);
diff -puN drivers/net/chelsio/sge.c~git-net drivers/net/chelsio/sge.c
--- a/drivers/net/chelsio/sge.c~git-net
+++ a/drivers/net/chelsio/sge.c
@@ -1470,9 +1470,9 @@ int t1_start_xmit(struct sk_buff *skb, s
 		}
 
 		if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
-		    skb->ip_summed == CHECKSUM_HW &&
+		    skb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb->nh.iph->protocol == IPPROTO_UDP)
-			if (unlikely(skb_checksum_help(skb, 0))) {
+			if (unlikely(skb_checksum_help(skb))) {
 				dev_kfree_skb_any(skb);
 				return NETDEV_TX_OK;
 			}
@@ -1495,11 +1495,11 @@ int t1_start_xmit(struct sk_buff *skb, s
 		cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl));
 		cpl->opcode = CPL_TX_PKT;
 		cpl->ip_csum_dis = 1;    /* SW calculates IP csum */
-		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1;
+		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1;
 		/* the length field isn't used so don't bother setting it */
 
-		st->tx_cso += (skb->ip_summed == CHECKSUM_HW);
-		sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW);
+		st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL);
+		sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL);
 		sge->stats.tx_reg_pkts++;
 	}
 	cpl->iff = dev->if_port;
diff -puN drivers/net/dl2k.c~git-net drivers/net/dl2k.c
--- a/drivers/net/dl2k.c~git-net
+++ a/drivers/net/dl2k.c
@@ -611,7 +611,7 @@ start_xmit (struct sk_buff *skb, struct 
 	txdesc = &np->tx_ring[entry];
 
 #if 0
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		txdesc->status |=
 		    cpu_to_le64 (TCPChecksumEnable | UDPChecksumEnable |
 				 IPChecksumEnable);
diff -puN drivers/net/e1000/e1000_main.c~git-net drivers/net/e1000/e1000_main.c
--- a/drivers/net/e1000/e1000_main.c~git-net
+++ a/drivers/net/e1000/e1000_main.c
@@ -2605,7 +2605,7 @@ e1000_tx_csum(struct e1000_adapter *adap
 	unsigned int i;
 	uint8_t css;
 
-	if (likely(skb->ip_summed == CHECKSUM_HW)) {
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		css = skb->h.raw - skb->data;
 
 		i = tx_ring->next_to_use;
@@ -2932,11 +2932,11 @@ e1000_xmit_frame(struct sk_buff *skb, st
 	}
 
 	/* reserve a descriptor for the offload context */
-	if ((mss) || (skb->ip_summed == CHECKSUM_HW))
+	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
 		count++;
 	count++;
 #else
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		count++;
 #endif
 
@@ -3613,7 +3613,7 @@ e1000_rx_checksum(struct e1000_adapter *
 		 */
 		csum = ntohl(csum ^ 0xFFFF);
 		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 	adapter->hw_csum_good++;
 }
diff -puN drivers/net/forcedeth.c~git-net drivers/net/forcedeth.c
--- a/drivers/net/forcedeth.c~git-net
+++ a/drivers/net/forcedeth.c
@@ -1521,7 +1521,8 @@ static int nv_start_xmit(struct sk_buff 
 		tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
 	else
 #endif
-	tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+	tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+			 NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
 	/* vlan tag */
 	if (np->vlangrp && vlan_tx_tag_present(skb)) {
diff -puN drivers/net/gianfar.c~git-net drivers/net/gianfar.c
--- a/drivers/net/gianfar.c~git-net
+++ a/drivers/net/gianfar.c
@@ -947,7 +947,7 @@ static int gfar_start_xmit(struct sk_buf
 
 	/* Set up checksumming */
 	if (likely((dev->features & NETIF_F_IP_CSUM)
-			&& (CHECKSUM_HW == skb->ip_summed))) {
+			&& (CHECKSUM_PARTIAL == skb->ip_summed))) {
 		fcb = gfar_add_fcb(skb, txbdp);
 		status |= TXBD_TOE;
 		gfar_tx_checksum(skb, fcb);
diff -puN drivers/net/hamachi.c~git-net drivers/net/hamachi.c
--- a/drivers/net/hamachi.c~git-net
+++ a/drivers/net/hamachi.c
@@ -1648,7 +1648,7 @@ static int hamachi_rx(struct net_device 
 						* could do the pseudo myself and return
 						* CHECKSUM_UNNECESSARY
 						*/
-						skb->ip_summed = CHECKSUM_HW;
+						skb->ip_summed = CHECKSUM_COMPLETE;
 					}
 				}	
 			}
diff -puN drivers/net/ibm_emac/ibm_emac_core.c~git-net drivers/net/ibm_emac/ibm_emac_core.c
--- a/drivers/net/ibm_emac/ibm_emac_core.c~git-net
+++ a/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1036,7 +1036,7 @@ static inline u16 emac_tx_csum(struct oc
 			       struct sk_buff *skb)
 {
 #if defined(CONFIG_IBM_EMAC_TAH)
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		++dev->stats.tx_packets_csum;
 		return EMAC_TX_CTRL_TAH_CSUM;
 	}
diff -puN drivers/net/ioc3-eth.c~git-net drivers/net/ioc3-eth.c
--- a/drivers/net/ioc3-eth.c~git-net
+++ a/drivers/net/ioc3-eth.c
@@ -1387,7 +1387,7 @@ static int ioc3_start_xmit(struct sk_buf
 	 * MAC header which should not be summed and the TCP/UDP pseudo headers
 	 * manually.
 	 */
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int proto = ntohs(skb->nh.iph->protocol);
 		unsigned int csoff;
 		struct iphdr *ih = skb->nh.iph;
diff -puN drivers/net/irda/ali-ircc.c~git-net drivers/net/irda/ali-ircc.c
--- a/drivers/net/irda/ali-ircc.c~git-net
+++ a/drivers/net/irda/ali-ircc.c
@@ -249,7 +249,7 @@ static void __exit ali_ircc_cleanup(void
 
 	IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__);	
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			ali_ircc_close(dev_self[i]);
 	}
@@ -273,6 +273,12 @@ static int ali_ircc_open(int i, chipio_t
 	int err;
 			
 	IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__);	
+
+	if (i >= ARRAY_SIZE(dev_self)) {
+		IRDA_ERROR("%s(), maximum number of supported chips reached!\n",
+			   __FUNCTION__);
+		return -ENOMEM;
+	}
 	
 	/* Set FIR FIFO and DMA Threshold */
 	if ((ali_ircc_setup(info)) == -1)
diff -puN drivers/net/irda/irport.c~git-net drivers/net/irda/irport.c
--- a/drivers/net/irda/irport.c~git-net
+++ a/drivers/net/irda/irport.c
@@ -1090,7 +1090,7 @@ static int __init irport_init(void)
 {
  	int i;
 
- 	for (i=0; (io[i] < 2000) && (i < 4); i++) {
+ 	for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) {
  		if (irport_open(i, io[i], irq[i]) != NULL)
  			return 0;
  	}
@@ -1112,7 +1112,7 @@ static void __exit irport_cleanup(void)
 
         IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
  		if (dev_self[i])
  			irport_close(dev_self[i]);
  	}
diff -puN drivers/net/irda/via-ircc.c~git-net drivers/net/irda/via-ircc.c
--- a/drivers/net/irda/via-ircc.c~git-net
+++ a/drivers/net/irda/via-ircc.c
@@ -279,7 +279,7 @@ static void via_ircc_clean(void)
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			via_ircc_close(dev_self[i]);
 	}
@@ -327,6 +327,9 @@ static __devinit int via_ircc_open(int i
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
+	if (i >= ARRAY_SIZE(dev_self))
+		return -ENOMEM;
+
 	/* Allocate new instance of the driver */
 	dev = alloc_irdadev(sizeof(struct via_ircc_cb));
 	if (dev == NULL) 
diff -puN drivers/net/irda/w83977af_ir.c~git-net drivers/net/irda/w83977af_ir.c
--- a/drivers/net/irda/w83977af_ir.c~git-net
+++ a/drivers/net/irda/w83977af_ir.c
@@ -116,7 +116,7 @@ static int __init w83977af_init(void)
 
 	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
 
-	for (i=0; (io[i] < 2000) && (i < 4); i++) { 
+	for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) {
 		if (w83977af_open(i, io[i], irq[i], dma[i]) == 0)
 			return 0;
 	}
@@ -135,7 +135,7 @@ static void __exit w83977af_cleanup(void
 
         IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			w83977af_close(dev_self[i]);
 	}
diff -puN drivers/net/ixgb/ixgb_main.c~git-net drivers/net/ixgb/ixgb_main.c
--- a/drivers/net/ixgb/ixgb_main.c~git-net
+++ a/drivers/net/ixgb/ixgb_main.c
@@ -1243,7 +1243,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapte
 	unsigned int i;
 	uint8_t css, cso;
 
-	if(likely(skb->ip_summed == CHECKSUM_HW)) {
+	if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		css = skb->h.raw - skb->data;
 		cso = (skb->h.raw + skb->csum) - skb->data;
 
diff -puN drivers/net/mv643xx_eth.c~git-net drivers/net/mv643xx_eth.c
--- a/drivers/net/mv643xx_eth.c~git-net
+++ a/drivers/net/mv643xx_eth.c
@@ -1145,7 +1145,7 @@ static void eth_tx_submit_descs_for_skb(
 	desc->byte_cnt = length;
 	desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		BUG_ON(skb->protocol != ETH_P_IP);
 
 		cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
diff -puN drivers/net/myri10ge/myri10ge.c~git-net drivers/net/myri10ge/myri10ge.c
--- a/drivers/net/myri10ge/myri10ge.c~git-net
+++ a/drivers/net/myri10ge/myri10ge.c
@@ -930,7 +930,7 @@ static inline void myri10ge_vlan_ip_csum
 	    (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) ||
 	     vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) {
 		skb->csum = hw_csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 }
 
@@ -973,7 +973,7 @@ myri10ge_rx_done(struct myri10ge_priv *m
 		if ((skb->protocol == ntohs(ETH_P_IP)) ||
 		    (skb->protocol == ntohs(ETH_P_IPV6))) {
 			skb->csum = ntohs((u16) csum);
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else
 			myri10ge_vlan_ip_csum(skb, ntohs((u16) csum));
 	}
@@ -1897,13 +1897,13 @@ again:
 	pseudo_hdr_offset = 0;
 	odd_flag = 0;
 	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
-	if (likely(skb->ip_summed == CHECKSUM_HW)) {
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		cksum_offset = (skb->h.raw - skb->data);
 		pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
 		if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) {
-			if (skb_checksum_help(skb, 0))
+			if (skb_checksum_help(skb))
 				goto drop;
 			cksum_offset = 0;
 			pseudo_hdr_offset = 0;
diff -puN drivers/net/ns83820.c~git-net drivers/net/ns83820.c
--- a/drivers/net/ns83820.c~git-net
+++ a/drivers/net/ns83820.c
@@ -1157,7 +1157,7 @@ again:
 	if (!nr_frags)
 		frag = NULL;
 	extsts = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		extsts |= EXTSTS_IPPKT;
 		if (IPPROTO_TCP == skb->nh.iph->protocol)
 			extsts |= EXTSTS_TCPPKT;
diff -puN drivers/net/r8169.c~git-net drivers/net/r8169.c
--- a/drivers/net/r8169.c~git-net
+++ a/drivers/net/r8169.c
@@ -2169,7 +2169,7 @@ static inline u32 rtl8169_tso_csum(struc
 		if (mss)
 			return LargeSend | ((mss & MSSMask) << MSSShift);
 	}
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const struct iphdr *ip = skb->nh.iph;
 
 		if (ip->protocol == IPPROTO_TCP)
diff -puN drivers/net/s2io.c~git-net drivers/net/s2io.c
--- a/drivers/net/s2io.c~git-net
+++ a/drivers/net/s2io.c
@@ -3893,7 +3893,7 @@ static int s2io_xmit(struct sk_buff *skb
 		txdp->Control_1 |= TXD_TCP_LSO_MSS(s2io_tcp_mss(skb));
 	}
 #endif
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		txdp->Control_2 |=
 		    (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN |
 		     TXD_TX_CKO_UDP_EN);
diff -puN drivers/net/sk98lin/skge.c~git-net drivers/net/sk98lin/skge.c
--- a/drivers/net/sk98lin/skge.c~git-net
+++ a/drivers/net/sk98lin/skge.c
@@ -1559,7 +1559,7 @@ struct sk_buff	*pMessage)	/* pointer to 
 	pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32);
 	pTxd->pMBuf     = pMessage;
 
-	if (pMessage->ip_summed == CHECKSUM_HW) {
+	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdrlen = pMessage->h.raw - pMessage->data;
 		u16 offset = hdrlen + pMessage->csum;
 
@@ -1678,7 +1678,7 @@ struct sk_buff	*pMessage)	/* pointer to 
 	/* 
 	** Does the HW need to evaluate checksum for TCP or UDP packets? 
 	*/
-	if (pMessage->ip_summed == CHECKSUM_HW) {
+	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdrlen = pMessage->h.raw - pMessage->data;
 		u16 offset = hdrlen + pMessage->csum;
 
@@ -2158,7 +2158,7 @@ rx_start:	
 
 #ifdef USE_SK_RX_CHECKSUM
 		pMsg->csum = pRxd->TcpSums & 0xffff;
-		pMsg->ip_summed = CHECKSUM_HW;
+		pMsg->ip_summed = CHECKSUM_COMPLETE;
 #else
 		pMsg->ip_summed = CHECKSUM_NONE;
 #endif
diff -puN drivers/net/skge.c~git-net drivers/net/skge.c
--- a/drivers/net/skge.c~git-net
+++ a/drivers/net/skge.c
@@ -2338,7 +2338,7 @@ static int skge_xmit_frame(struct sk_buf
 	td->dma_lo = map;
 	td->dma_hi = map >> 32;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int offset = skb->h.raw - skb->data;
 
 		/* This seems backwards, but it is what the sk98lin
@@ -2642,7 +2642,7 @@ static inline struct sk_buff *skge_rx_ge
 	skb->dev = skge->netdev;
 	if (skge->rx_csum) {
 		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 
 	skb->protocol = eth_type_trans(skb, skge->netdev);
diff -puN drivers/net/sky2.c~git-net drivers/net/sky2.c
--- a/drivers/net/sky2.c~git-net
+++ a/drivers/net/sky2.c
@@ -1168,7 +1168,7 @@ static unsigned tx_le_req(const struct s
 	if (skb_is_gso(skb))
 		++count;
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		++count;
 
 	return count;
@@ -1277,7 +1277,7 @@ static int sky2_xmit_frame(struct sk_buf
 #endif
 
 	/* Handle TCP checksum offload */
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdr = skb->h.raw - skb->data;
 		u16 offset = hdr + skb->csum;
 
@@ -1999,7 +1999,7 @@ static int sky2_status_intr(struct sky2_
 #endif
 		case OP_RXCHKS:
 			skb = sky2->rx_ring[sky2->rx_next].skb;
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = le16_to_cpu(status);
 			break;
 
diff -puN drivers/net/starfire.c~git-net drivers/net/starfire.c
--- a/drivers/net/starfire.c~git-net
+++ a/drivers/net/starfire.c
@@ -1230,7 +1230,7 @@ static int start_tx(struct sk_buff *skb,
 	}
 
 #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE)
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK))
 			return NETDEV_TX_OK;
 	}
@@ -1252,7 +1252,7 @@ static int start_tx(struct sk_buff *skb,
 				status |= TxDescIntr;
 				np->reap_tx = 0;
 			}
-			if (skb->ip_summed == CHECKSUM_HW) {
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				status |= TxCalTCP;
 				np->stats.tx_compressed++;
 			}
@@ -1499,7 +1499,7 @@ static int __netdev_rx(struct net_device
 		 * Until then, the printk stays. :-) -Ion
 		 */
 		else if (le16_to_cpu(desc->status2) & 0x0040) {
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = le16_to_cpu(desc->csum);
 			printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2));
 		}
diff -puN drivers/net/sungem.c~git-net drivers/net/sungem.c
--- a/drivers/net/sungem.c~git-net
+++ a/drivers/net/sungem.c
@@ -855,7 +855,7 @@ static int gem_rx(struct gem *gp, int wo
 		}
 
 		skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff);
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->protocol = eth_type_trans(skb, gp->dev);
 
 		netif_receive_skb(skb);
@@ -1026,7 +1026,7 @@ static int gem_start_xmit(struct sk_buff
 	unsigned long flags;
 
 	ctrl = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u64 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u64) (skb->h.raw - skb->data);
diff -puN drivers/net/sunhme.c~git-net drivers/net/sunhme.c
--- a/drivers/net/sunhme.c~git-net
+++ a/drivers/net/sunhme.c
@@ -1207,7 +1207,7 @@ static void happy_meal_transceiver_check
  * flags, thus:
  *
  * 	skb->csum = rxd->rx_flags & 0xffff;
- * 	skb->ip_summed = CHECKSUM_HW;
+ * 	skb->ip_summed = CHECKSUM_COMPLETE;
  *
  * before sending off the skb to the protocols, and we are good as gold.
  */
@@ -2074,7 +2074,7 @@ static void happy_meal_rx(struct happy_m
 
 		/* This card is _fucking_ hot... */
 		skb->csum = ntohs(csum ^ 0xffff);
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 
 		RXD(("len=%d csum=%4x]", len, csum));
 		skb->protocol = eth_type_trans(skb, dev);
@@ -2268,7 +2268,7 @@ static int happy_meal_start_xmit(struct 
  	u32 tx_flags;
 
 	tx_flags = TXFLAG_OWN;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u32 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u32) (skb->h.raw - skb->data);
diff -puN drivers/net/tg3.c~git-net drivers/net/tg3.c
--- a/drivers/net/tg3.c~git-net
+++ a/drivers/net/tg3.c
@@ -68,8 +68,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.64"
-#define DRV_MODULE_RELDATE	"July 31, 2006"
+#define DRV_MODULE_VERSION	"3.65"
+#define DRV_MODULE_RELDATE	"August 07, 2006"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
@@ -123,9 +123,6 @@
 			           TG3_RX_RCB_RING_SIZE(tp))
 #define TG3_TX_RING_BYTES	(sizeof(struct tg3_tx_buffer_desc) * \
 				 TG3_TX_RING_SIZE)
-#define TX_BUFFS_AVAIL(TP)						\
-	((TP)->tx_pending -						\
-	 (((TP)->tx_prod - (TP)->tx_cons) & (TG3_TX_RING_SIZE - 1)))
 #define NEXT_TX(N)		(((N) + 1) & (TG3_TX_RING_SIZE - 1))
 
 #define RX_PKT_BUF_SZ		(1536 + tp->rx_offset + 64)
@@ -267,7 +264,7 @@ static struct pci_device_id tg3_pci_tbl[
 
 MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
 
-static struct {
+static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_stats_keys[TG3_NUM_STATS] = {
 	{ "rx_octets" },
@@ -348,7 +345,7 @@ static struct {
 	{ "nic_tx_threshold_hit" }
 };
 
-static struct {
+static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_test_keys[TG3_NUM_TEST] = {
 	{ "nvram test     (online) " },
@@ -2987,6 +2984,13 @@ static void tg3_tx_recover(struct tg3 *t
 	spin_unlock(&tp->lock);
 }
 
+static inline u32 tg3_tx_avail(struct tg3 *tp)
+{
+	smp_mb();
+	return (tp->tx_pending -
+		((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1)));
+}
+
 /* Tigon3 never reports partial packet sends.  So we do not
  * need special logic to handle SKBs that have not had all
  * of their frags sent yet, like SunGEM does.
@@ -3038,12 +3042,20 @@ static void tg3_tx(struct tg3 *tp)
 
 	tp->tx_cons = sw_idx;
 
-	if (unlikely(netif_queue_stopped(tp->dev))) {
-		spin_lock(&tp->tx_lock);
+	/* Need to make the tx_cons update visible to tg3_start_xmit()
+	 * before checking for netif_queue_stopped().  Without the
+	 * memory barrier, there is a small possibility that tg3_start_xmit()
+	 * will miss it and cause the queue to be stopped forever.
+	 */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(tp->dev) &&
+		     (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))) {
+		netif_tx_lock(tp->dev);
 		if (netif_queue_stopped(tp->dev) &&
-		    (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH))
+		    (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))
 			netif_wake_queue(tp->dev);
-		spin_unlock(&tp->tx_lock);
+		netif_tx_unlock(tp->dev);
 	}
 }
 
@@ -3101,7 +3113,6 @@ static int tg3_alloc_rx_skb(struct tg3 *
 	if (skb == NULL)
 		return -ENOMEM;
 
-	skb->dev = tp->dev;
 	skb_reserve(skb, tp->rx_offset);
 
 	mapping = pci_map_single(tp->pdev, skb->data,
@@ -3274,7 +3285,6 @@ static int tg3_rx(struct tg3 *tp, int bu
 			if (copy_skb == NULL)
 				goto drop_it_no_recycle;
 
-			copy_skb->dev = tp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
@@ -3797,7 +3807,7 @@ static int tg3_start_xmit(struct sk_buff
 	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 	 * no IRQ context deadlocks to worry about either.  Rejoice!
 	 */
-	if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+	if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 
@@ -3841,11 +3851,11 @@ static int tg3_start_xmit(struct sk_buff
 		skb->h.th->check = 0;
 
 	}
-	else if (skb->ip_summed == CHECKSUM_HW)
+	else if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #else
 	mss = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #endif
 #if TG3_VLAN_TAG_USED
@@ -3893,12 +3903,10 @@ static int tg3_start_xmit(struct sk_buff
 	tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
 
 	tp->tx_prod = entry;
-	if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) {
-		spin_lock(&tp->tx_lock);
+	if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
-		if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH)
+		if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)
 			netif_wake_queue(tp->dev);
-		spin_unlock(&tp->tx_lock);
 	}
 
 out_unlock:
@@ -3920,7 +3928,7 @@ static int tg3_tso_bug(struct tg3 *tp, s
 	struct sk_buff *segs, *nskb;
 
 	/* Estimate the number of fragments in the worst case */
-	if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->gso_segs * 3))) {
+	if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) {
 		netif_stop_queue(tp->dev);
 		return NETDEV_TX_BUSY;
 	}
@@ -3960,7 +3968,7 @@ static int tg3_start_xmit_dma_bug(struct
 	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 	 * no IRQ context deadlocks to worry about either.  Rejoice!
 	 */
-	if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+	if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 
@@ -3973,7 +3981,7 @@ static int tg3_start_xmit_dma_bug(struct
 
 	entry = tp->tx_prod;
 	base_flags = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #if TG3_TSO_SUPPORT != 0
 	mss = 0;
@@ -4110,12 +4118,10 @@ static int tg3_start_xmit_dma_bug(struct
 	tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
 
 	tp->tx_prod = entry;
-	if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) {
-		spin_lock(&tp->tx_lock);
+	if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
-		if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH)
+		if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)
 			netif_wake_queue(tp->dev);
-		spin_unlock(&tp->tx_lock);
 	}
 
 out_unlock:
@@ -4963,7 +4969,7 @@ static int tg3_halt(struct tg3 *tp, int 
 #define TG3_FW_BSS_ADDR		0x08000a70
 #define TG3_FW_BSS_LEN		0x10
 
-static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
+static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
 	0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
 	0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000,
 	0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034,
@@ -5057,7 +5063,7 @@ static u32 tg3FwText[(TG3_FW_TEXT_LEN / 
 	0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000
 };
 
-static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
+static const u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
 	0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430,
 	0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
@@ -5122,13 +5128,13 @@ static int tg3_halt_cpu(struct tg3 *tp, 
 struct fw_info {
 	unsigned int text_base;
 	unsigned int text_len;
-	u32 *text_data;
+	const u32 *text_data;
 	unsigned int rodata_base;
 	unsigned int rodata_len;
-	u32 *rodata_data;
+	const u32 *rodata_data;
 	unsigned int data_base;
 	unsigned int data_len;
-	u32 *data_data;
+	const u32 *data_data;
 };
 
 /* tp->lock is held. */
@@ -5260,7 +5266,7 @@ static int tg3_load_5701_a0_firmware_fix
 #define TG3_TSO_FW_BSS_ADDR		0x08001b80
 #define TG3_TSO_FW_BSS_LEN		0x894
 
-static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
+static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
 	0x0e000003, 0x00000000, 0x08001b24, 0x00000000, 0x10000003, 0x00000000,
 	0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd4000, 0x03a0f021, 0x3c100800,
 	0x26100000, 0x0e000010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
@@ -5547,7 +5553,7 @@ static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT
 	0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000,
 };
 
-static u32 tg3TsoFwRodata[] = {
+static const u32 tg3TsoFwRodata[] = {
 	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
 	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f,
 	0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000,
@@ -5555,7 +5561,7 @@ static u32 tg3TsoFwRodata[] = {
 	0x00000000,
 };
 
-static u32 tg3TsoFwData[] = {
+static const u32 tg3TsoFwData[] = {
 	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000,
@@ -5577,7 +5583,7 @@ static u32 tg3TsoFwData[] = {
 #define TG3_TSO5_FW_BSS_ADDR		0x00010f50
 #define TG3_TSO5_FW_BSS_LEN		0x88
 
-static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
 	0x0c004003, 0x00000000, 0x00010f04, 0x00000000, 0x10000003, 0x00000000,
 	0x0000000d, 0x0000000d, 0x3c1d0001, 0x37bde000, 0x03a0f021, 0x3c100001,
 	0x26100000, 0x0c004010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
@@ -5736,14 +5742,14 @@ static u32 tg3Tso5FwText[(TG3_TSO5_FW_TE
 	0x00000000, 0x00000000, 0x00000000,
 };
 
-static u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
 	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
 	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000,
 	0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
 	0x00000000, 0x00000000, 0x00000000,
 };
 
-static u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
 	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000,
 };
@@ -11474,7 +11480,6 @@ static int __devinit tg3_init_one(struct
 	tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA;
 #endif
 	spin_lock_init(&tp->lock);
-	spin_lock_init(&tp->tx_lock);
 	spin_lock_init(&tp->indirect_lock);
 	INIT_WORK(&tp->reset_task, tg3_reset_task, tp);
 
diff -puN drivers/net/tg3.h~git-net drivers/net/tg3.h
--- a/drivers/net/tg3.h~git-net
+++ a/drivers/net/tg3.h
@@ -2079,9 +2079,9 @@ struct tg3 {
 	 * lock: Held during reset, PHY access, timer, and when
 	 *       updating tg3_flags and tg3_flags2.
 	 *
-	 * tx_lock: Held during tg3_start_xmit and tg3_tx only
-	 *          when calling netif_[start|stop]_queue.
-	 *          tg3_start_xmit is protected by netif_tx_lock.
+	 * netif_tx_lock: Held during tg3_start_xmit. tg3_tx holds
+	 *                netif_tx_lock when it needs to call
+	 *                netif_wake_queue.
 	 *
 	 * Both of these locks are to be held with BH safety.
 	 *
@@ -2118,8 +2118,6 @@ struct tg3 {
 	u32				tx_cons;
 	u32				tx_pending;
 
-	spinlock_t			tx_lock;
-
 	struct tg3_tx_buffer_desc	*tx_ring;
 	struct tx_ring_info		*tx_buffers;
 	dma_addr_t			tx_desc_mapping;
diff -puN drivers/net/typhoon.c~git-net drivers/net/typhoon.c
--- a/drivers/net/typhoon.c~git-net
+++ a/drivers/net/typhoon.c
@@ -826,7 +826,7 @@ typhoon_start_tx(struct sk_buff *skb, st
 	first_txd->addrHi = (u64)((unsigned long) skb) >> 32;
 	first_txd->processFlags = 0;
 
-	if(skb->ip_summed == CHECKSUM_HW) {
+	if(skb->ip_summed == CHECKSUM_PARTIAL) {
 		/* The 3XP will figure out if this is UDP/TCP */
 		first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM;
 		first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM;
diff -puN drivers/net/via-rhine.c~git-net drivers/net/via-rhine.c
--- a/drivers/net/via-rhine.c~git-net
+++ a/drivers/net/via-rhine.c
@@ -1230,7 +1230,7 @@ static int rhine_start_tx(struct sk_buff
 	rp->tx_skbuff[entry] = skb;
 
 	if ((rp->quirks & rqRhineI) &&
-	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_HW)) {
+	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) {
 		/* Must use alignment buffer. */
 		if (skb->len > PKT_BUF_SZ) {
 			/* packet too long, drop it */
diff -puN drivers/net/via-velocity.c~git-net drivers/net/via-velocity.c
--- a/drivers/net/via-velocity.c~git-net
+++ a/drivers/net/via-velocity.c
@@ -2002,7 +2002,7 @@ static int velocity_xmit(struct sk_buff 
 	 *	Handle hardware checksum
 	 */
 	if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
-				 && (skb->ip_summed == CHECKSUM_HW)) {
+				 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
 		struct iphdr *ip = skb->nh.iph;
 		if (ip->protocol == IPPROTO_TCP)
 			td_ptr->tdesc1.TCR |= TCR0_TCPCK;
diff -puN /dev/null include/linux/fib_rules.h
--- /dev/null
+++ a/include/linux/fib_rules.h
@@ -0,0 +1,64 @@
+#ifndef __LINUX_FIB_RULES_H
+#define __LINUX_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/rtnetlink.h>
+
+/* rule is permanent, and cannot be deleted */
+#define FIB_RULE_PERMANENT	1
+
+struct fib_rule_hdr
+{
+	__u8		family;
+	__u8		dst_len;
+	__u8		src_len;
+	__u8		tos;
+
+	__u8		table;
+	__u8		res1;	/* reserved */
+	__u8		res2;	/* reserved */
+	__u8		action;
+
+	__u32		flags;
+};
+
+enum
+{
+	FRA_UNSPEC,
+	FRA_DST,	/* destination address */
+	FRA_SRC,	/* source address */
+	FRA_IFNAME,	/* interface name */
+	FRA_UNUSED1,
+	FRA_UNUSED2,
+	FRA_PRIORITY,	/* priority/preference */
+	FRA_UNUSED3,
+	FRA_UNUSED4,
+	FRA_UNUSED5,
+	FRA_FWMARK,	/* netfilter mark (IPv4) */
+	FRA_FLOW,	/* flow/class id */
+	FRA_UNUSED6,
+	FRA_UNUSED7,
+	FRA_UNUSED8,
+	FRA_TABLE,	/* Extended table id */
+	__FRA_MAX
+};
+
+#define FRA_MAX (__FRA_MAX - 1)
+
+enum
+{
+	FR_ACT_UNSPEC,
+	FR_ACT_TO_TBL,		/* Pass to fixed table */
+	FR_ACT_RES1,
+	FR_ACT_RES2,
+	FR_ACT_RES3,
+	FR_ACT_RES4,
+	FR_ACT_BLACKHOLE,	/* Drop without notification */
+	FR_ACT_UNREACHABLE,	/* Drop with ENETUNREACH */
+	FR_ACT_PROHIBIT,	/* Drop with EACCES */
+	__FR_ACT_MAX,
+};
+
+#define FR_ACT_MAX (__FR_ACT_MAX - 1)
+
+#endif
diff -puN include/linux/if.h~git-net include/linux/if.h
--- a/include/linux/if.h~git-net
+++ a/include/linux/if.h
@@ -212,5 +212,134 @@ struct ifconf 
 #define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
 #define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
 
+/* The struct should be in sync with struct net_device_stats */
+struct rtnl_link_stats
+{
+	__u32	rx_packets;		/* total packets received	*/
+	__u32	tx_packets;		/* total packets transmitted	*/
+	__u32	rx_bytes;		/* total bytes received 	*/
+	__u32	tx_bytes;		/* total bytes transmitted	*/
+	__u32	rx_errors;		/* bad packets received		*/
+	__u32	tx_errors;		/* packet transmit problems	*/
+	__u32	rx_dropped;		/* no space in linux buffers	*/
+	__u32	tx_dropped;		/* no space available in linux	*/
+	__u32	multicast;		/* multicast packets received	*/
+	__u32	collisions;
+
+	/* detailed rx_errors: */
+	__u32	rx_length_errors;
+	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u32	rx_frame_errors;	/* recv'd frame alignment error */
+	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u32	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u32	tx_aborted_errors;
+	__u32	tx_carrier_errors;
+	__u32	tx_fifo_errors;
+	__u32	tx_heartbeat_errors;
+	__u32	tx_window_errors;
+
+	/* for cslip etc */
+	__u32	rx_compressed;
+	__u32	tx_compressed;
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap
+{
+	__u64	mem_start;
+	__u64	mem_end;
+	__u64	base_addr;
+	__u16	irq;
+	__u8	dma;
+	__u8	port;
+};
+
+enum
+{
+	IFLA_UNSPEC,
+	IFLA_ADDRESS,
+	IFLA_BROADCAST,
+	IFLA_IFNAME,
+	IFLA_MTU,
+	IFLA_LINK,
+	IFLA_QDISC,
+	IFLA_STATS,
+	IFLA_COST,
+#define IFLA_COST IFLA_COST
+	IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+	IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+	IFLA_PROTINFO,		/* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+	IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+	IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+	IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+	IFLA_OPERSTATE,
+	IFLA_LINKMODE,
+	__IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* ifi_flags.
+
+   IFF_* flags.
+
+   The only change is:
+   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+   more not changeable by user. They describe link media
+   characteristics and set by device driver.
+
+   Comments:
+   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+   - If neither of these three flags are set;
+     the interface is NBMA.
+
+   - IFF_MULTICAST does not mean anything special:
+   multicasts can be used on all not-NBMA links.
+   IFF_MULTICAST means that this media uses special encapsulation
+   for multicast frames. Apparently, all IFF_POINTOPOINT and
+   IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+   For usual devices it is equal ifi_index.
+   If it is a "virtual interface" (f.e. tunnel), ifi_link
+   can point to real physical interface (f.e. for bandwidth calculations),
+   or maybe 0, what means, that real media is unknown (usual
+   for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum
+{
+	IFLA_INET6_UNSPEC,
+	IFLA_INET6_FLAGS,	/* link flags			*/
+	IFLA_INET6_CONF,	/* sysctl parameters		*/
+	IFLA_INET6_STATS,	/* statistics			*/
+	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
+	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
+	__IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
+
+struct ifla_cacheinfo
+{
+	__u32	max_reasm_len;
+	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
+	__u32	reachable_time;
+	__u32	retrans_time;
+};
 
 #endif /* _LINUX_IF_H */
diff -puN /dev/null include/linux/if_addr.h
--- /dev/null
+++ a/include/linux/if_addr.h
@@ -0,0 +1,53 @@
+#ifndef __LINUX_IF_ADDR_H
+#define __LINUX_IF_ADDR_H
+
+#include <linux/netlink.h>
+
+struct ifaddrmsg
+{
+	__u8		ifa_family;
+	__u8		ifa_prefixlen;	/* The prefix length		*/
+	__u8		ifa_flags;	/* Flags			*/
+	__u8		ifa_scope;	/* Address scope		*/
+	__u32		ifa_index;	/* Link index			*/
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ */
+enum
+{
+	IFA_UNSPEC,
+	IFA_ADDRESS,
+	IFA_LOCAL,
+	IFA_LABEL,
+	IFA_BROADCAST,
+	IFA_ANYCAST,
+	IFA_CACHEINFO,
+	IFA_MULTICAST,
+	__IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY		0x01
+#define IFA_F_TEMPORARY		IFA_F_SECONDARY
+
+#define IFA_F_DEPRECATED	0x20
+#define IFA_F_TENTATIVE		0x40
+#define IFA_F_PERMANENT		0x80
+
+struct ifa_cacheinfo
+{
+	__u32	ifa_prefered;
+	__u32	ifa_valid;
+	__u32	cstamp; /* created timestamp, hundredths of seconds */
+	__u32	tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+#endif
diff -puN include/linux/ip.h~git-net include/linux/ip.h
--- a/include/linux/ip.h~git-net
+++ a/include/linux/ip.h
@@ -57,6 +57,7 @@
 #define IPOPT_SEC	(2 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_LSRR	(3 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_TIMESTAMP	(4 |IPOPT_MEASUREMENT)
+#define IPOPT_CIPSO	(6 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_RR	(7 |IPOPT_CONTROL)
 #define IPOPT_SID	(8 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_SSRR	(9 |IPOPT_CONTROL|IPOPT_COPY)
diff -puN /dev/null include/linux/neighbour.h
--- /dev/null
+++ a/include/linux/neighbour.h
@@ -0,0 +1,159 @@
+#ifndef __LINUX_NEIGHBOUR_H
+#define __LINUX_NEIGHBOUR_H
+
+#include <linux/netlink.h>
+
+struct ndmsg
+{
+	__u8		ndm_family;
+	__u8		ndm_pad1;
+	__u16		ndm_pad2;
+	__s32		ndm_ifindex;
+	__u16		ndm_state;
+	__u8		ndm_flags;
+	__u8		ndm_type;
+};
+
+enum
+{
+	NDA_UNSPEC,
+	NDA_DST,
+	NDA_LLADDR,
+	NDA_CACHEINFO,
+	NDA_PROBES,
+	__NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ *	Neighbor Cache Entry Flags
+ */
+
+#define NTF_PROXY	0x08	/* == ATF_PUBL */
+#define NTF_ROUTER	0x80
+
+/*
+ *	Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE	0x01
+#define NUD_REACHABLE	0x02
+#define NUD_STALE	0x04
+#define NUD_DELAY	0x08
+#define NUD_PROBE	0x10
+#define NUD_FAILED	0x20
+
+/* Dummy states */
+#define NUD_NOARP	0x40
+#define NUD_PERMANENT	0x80
+#define NUD_NONE	0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
+   and make no address resolution or NUD.
+   NUD_PERMANENT is also cannot be deleted by garbage collectors.
+ */
+
+struct nda_cacheinfo
+{
+	__u32		ndm_confirmed;
+	__u32		ndm_used;
+	__u32		ndm_updated;
+	__u32		ndm_refcnt;
+};
+
+/*****************************************************************
+ *		Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats
+{
+	__u64		ndts_allocs;
+	__u64		ndts_destroys;
+	__u64		ndts_hash_grows;
+	__u64		ndts_res_failed;
+	__u64		ndts_lookups;
+	__u64		ndts_hits;
+	__u64		ndts_rcv_probes_mcast;
+	__u64		ndts_rcv_probes_ucast;
+	__u64		ndts_periodic_gc_runs;
+	__u64		ndts_forced_gc_runs;
+};
+
+enum {
+	NDTPA_UNSPEC,
+	NDTPA_IFINDEX,			/* u32, unchangeable */
+	NDTPA_REFCNT,			/* u32, read-only */
+	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
+	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
+	NDTPA_RETRANS_TIME,		/* u64, msecs */
+	NDTPA_GC_STALETIME,		/* u64, msecs */
+	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
+	NDTPA_QUEUE_LEN,		/* u32 */
+	NDTPA_APP_PROBES,		/* u32 */
+	NDTPA_UCAST_PROBES,		/* u32 */
+	NDTPA_MCAST_PROBES,		/* u32 */
+	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_QLEN,		/* u32 */
+	NDTPA_LOCKTIME,			/* u64, msecs */
+	__NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+	__u8		ndtm_family;
+	__u8		ndtm_pad1;
+	__u16		ndtm_pad2;
+};
+
+struct ndt_config
+{
+	__u16		ndtc_key_len;
+	__u16		ndtc_entry_size;
+	__u32		ndtc_entries;
+	__u32		ndtc_last_flush;	/* delta to now in msecs */
+	__u32		ndtc_last_rand;		/* delta to now in msecs */
+	__u32		ndtc_hash_rnd;
+	__u32		ndtc_hash_mask;
+	__u32		ndtc_hash_chain_gc;
+	__u32		ndtc_proxy_qlen;
+};
+
+enum {
+	NDTA_UNSPEC,
+	NDTA_NAME,			/* char *, unchangeable */
+	NDTA_THRESH1,			/* u32 */
+	NDTA_THRESH2,			/* u32 */
+	NDTA_THRESH3,			/* u32 */
+	NDTA_CONFIG,			/* struct ndt_config, read-only */
+	NDTA_PARMS,			/* nested TLV NDTPA_* */
+	NDTA_STATS,			/* struct ndt_stats, read-only */
+	NDTA_GC_INTERVAL,		/* u64, msecs */
+	__NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+#endif
diff -puN include/linux/net.h~git-net include/linux/net.h
--- a/include/linux/net.h~git-net
+++ a/include/linux/net.h
@@ -169,11 +169,6 @@ struct proto_ops {
 struct net_proto_family {
 	int		family;
 	int		(*create)(struct socket *sock, int protocol);
-	/* These are counters for the number of different methods of
-	   each we support */
-	short		authentication;
-	short		encryption;
-	short		encrypt_net;
 	struct module	*owner;
 };
 
@@ -181,8 +176,8 @@ struct iovec;
 struct kvec;
 
 extern int	     sock_wake_async(struct socket *sk, int how, int band);
-extern int	     sock_register(struct net_proto_family *fam);
-extern int	     sock_unregister(int family);
+extern int	     sock_register(const struct net_proto_family *fam);
+extern void	     sock_unregister(int family);
 extern int	     sock_create(int family, int type, int proto,
 				 struct socket **res);
 extern int	     sock_create_kern(int family, int type, int proto,
@@ -208,6 +203,25 @@ extern int   	     kernel_recvmsg(struct
 				    struct kvec *vec, size_t num,
 				    size_t len, int flags);
 
+extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
+		       int addrlen);
+extern int kernel_listen(struct socket *sock, int backlog);
+extern int kernel_accept(struct socket *sock, struct socket **newsock,
+			 int flags);
+extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
+			  int addrlen, int flags);
+extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			      int *addrlen);
+extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			      int *addrlen);
+extern int kernel_getsockopt(struct socket *sock, int level, int optname,
+			     char *optval, int *optlen);
+extern int kernel_setsockopt(struct socket *sock, int level, int optname,
+			     char *optval, int optlen);
+extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+			   size_t size, int flags);
+extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+
 #ifndef CONFIG_SMP
 #define SOCKOPS_WRAPPED(name) name
 #define SOCKOPS_WRAP(name, fam)
diff -puN include/linux/netdevice.h~git-net include/linux/netdevice.h
--- a/include/linux/netdevice.h~git-net
+++ a/include/linux/netdevice.h
@@ -973,7 +973,7 @@ extern void		dev_mcast_init(void);
 extern int		netdev_max_backlog;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
-extern int skb_checksum_help(struct sk_buff *skb, int inward);
+extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
@@ -1009,7 +1009,7 @@ static inline int netif_needs_gso(struct
 {
 	return skb_is_gso(skb) &&
 	       (!skb_gso_ok(skb, dev->features) ||
-		unlikely(skb->ip_summed != CHECKSUM_HW));
+		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
 #endif /* __KERNEL__ */
diff -puN include/linux/netfilter.h~git-net include/linux/netfilter.h
--- a/include/linux/netfilter.h~git-net
+++ a/include/linux/netfilter.h
@@ -282,6 +282,12 @@ extern void nf_invalidate_cache(int pf);
    Returns true or false. */
 extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
 
+extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval,
+				u_int32_t csum);
+extern u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+				      u_int32_t oldval, u_int32_t newval,
+				      u_int16_t csum, int pseudohdr);
+
 struct nf_afinfo {
 	unsigned short	family;
 	unsigned int	(*checksum)(struct sk_buff *skb, unsigned int hook,
diff -puN include/linux/netfilter_ipv4/ip_nat.h~git-net include/linux/netfilter_ipv4/ip_nat.h
--- a/include/linux/netfilter_ipv4/ip_nat.h~git-net
+++ a/include/linux/netfilter_ipv4/ip_nat.h
@@ -72,10 +72,6 @@ extern unsigned int ip_nat_setup_info(st
 extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack *ignored_conntrack);
 
-/* Calculate relative checksum. */
-extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
-				    u_int32_t newval,
-				    u_int16_t oldcheck);
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define ip_nat_multi_range ip_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff -puN include/linux/netfilter_ipv4/ip_nat_core.h~git-net include/linux/netfilter_ipv4/ip_nat_core.h
--- a/include/linux/netfilter_ipv4/ip_nat_core.h~git-net
+++ a/include/linux/netfilter_ipv4/ip_nat_core.h
@@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct
 			       unsigned int hooknum,
 			       struct sk_buff **pskb);
 
-extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-					 struct ip_conntrack *ct,
-					 enum ip_nat_manip_type manip,
-					 enum ip_conntrack_dir dir);
+extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+					 enum ip_conntrack_info ctinfo,
+					 unsigned int hooknum,
+					 struct sk_buff **pskb);
 #endif /* _IP_NAT_CORE_H */
diff -puN include/linux/rtnetlink.h~git-net include/linux/rtnetlink.h
--- a/include/linux/rtnetlink.h~git-net
+++ a/include/linux/rtnetlink.h
@@ -2,6 +2,7 @@
 #define __LINUX_RTNETLINK_H
 
 #include <linux/netlink.h>
+#include <linux/if.h>
 
 /****
  *		Routing/neighbour discovery messages.
@@ -238,10 +239,8 @@ enum rt_class_t
 	RT_TABLE_DEFAULT=253,
 	RT_TABLE_MAIN=254,
 	RT_TABLE_LOCAL=255,
-	__RT_TABLE_MAX
+	RT_TABLE_MAX=0xFFFFFFFF
 };
-#define RT_TABLE_MAX (__RT_TABLE_MAX - 1)
-
 
 
 /* Routing message attributes */
@@ -263,6 +262,7 @@ enum rtattr_type_t
 	RTA_CACHEINFO,
 	RTA_SESSION,
 	RTA_MP_ALGO,
+	RTA_TABLE,
 	__RTA_MAX
 };
 
@@ -383,226 +383,6 @@ struct rta_session
 	} u;
 };
 
-
-/*********************************************************
- *		Interface address.
- ****/
-
-struct ifaddrmsg
-{
-	unsigned char	ifa_family;
-	unsigned char	ifa_prefixlen;	/* The prefix length		*/
-	unsigned char	ifa_flags;	/* Flags			*/
-	unsigned char	ifa_scope;	/* See above			*/
-	int		ifa_index;	/* Link index			*/
-};
-
-enum
-{
-	IFA_UNSPEC,
-	IFA_ADDRESS,
-	IFA_LOCAL,
-	IFA_LABEL,
-	IFA_BROADCAST,
-	IFA_ANYCAST,
-	IFA_CACHEINFO,
-	IFA_MULTICAST,
-	__IFA_MAX
-};
-
-#define IFA_MAX (__IFA_MAX - 1)
-
-/* ifa_flags */
-
-#define IFA_F_SECONDARY		0x01
-#define IFA_F_TEMPORARY		IFA_F_SECONDARY
-
-#define IFA_F_DEPRECATED	0x20
-#define IFA_F_TENTATIVE		0x40
-#define IFA_F_PERMANENT		0x80
-
-struct ifa_cacheinfo
-{
-	__u32	ifa_prefered;
-	__u32	ifa_valid;
-	__u32	cstamp; /* created timestamp, hundredths of seconds */
-	__u32	tstamp; /* updated timestamp, hundredths of seconds */
-};
-
-
-#define IFA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
-#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
-
-/*
-   Important comment:
-   IFA_ADDRESS is prefix address, rather than local interface address.
-   It makes no difference for normally configured broadcast interfaces,
-   but for point-to-point IFA_ADDRESS is DESTINATION address,
-   local address is supplied in IFA_LOCAL attribute.
- */
-
-/**************************************************************
- *		Neighbour discovery.
- ****/
-
-struct ndmsg
-{
-	unsigned char	ndm_family;
-	unsigned char	ndm_pad1;
-	unsigned short	ndm_pad2;
-	int		ndm_ifindex;	/* Link index			*/
-	__u16		ndm_state;
-	__u8		ndm_flags;
-	__u8		ndm_type;
-};
-
-enum
-{
-	NDA_UNSPEC,
-	NDA_DST,
-	NDA_LLADDR,
-	NDA_CACHEINFO,
-	NDA_PROBES,
-	__NDA_MAX
-};
-
-#define NDA_MAX (__NDA_MAX - 1)
-
-#define NDA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
-#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg))
-
-/*
- *	Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_ROUTER	0x80
-
-/*
- *	Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE	0x01
-#define NUD_REACHABLE	0x02
-#define NUD_STALE	0x04
-#define NUD_DELAY	0x08
-#define NUD_PROBE	0x10
-#define NUD_FAILED	0x20
-
-/* Dummy states */
-#define NUD_NOARP	0x40
-#define NUD_PERMANENT	0x80
-#define NUD_NONE	0x00
-
-
-struct nda_cacheinfo
-{
-	__u32		ndm_confirmed;
-	__u32		ndm_used;
-	__u32		ndm_updated;
-	__u32		ndm_refcnt;
-};
-
-
-/*****************************************************************
- *		Neighbour tables specific messages.
- *
- * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
- * NLM_F_DUMP flag set. Every neighbour table configuration is
- * spread over multiple messages to avoid running into message
- * size limits on systems with many interfaces. The first message
- * in the sequence transports all not device specific data such as
- * statistics, configuration, and the default parameter set.
- * This message is followed by 0..n messages carrying device
- * specific parameter sets.
- * Although the ordering should be sufficient, NDTA_NAME can be
- * used to identify sequences. The initial message can be identified
- * by checking for NDTA_CONFIG. The device specific messages do
- * not contain this TLV but have NDTPA_IFINDEX set to the
- * corresponding interface index.
- *
- * To change neighbour table attributes, send RTM_SETNEIGHTBL
- * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
- * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
- * otherwise. Device specific parameter sets can be changed by
- * setting NDTPA_IFINDEX to the interface index of the corresponding
- * device.
- ****/
-
-struct ndt_stats
-{
-	__u64		ndts_allocs;
-	__u64		ndts_destroys;
-	__u64		ndts_hash_grows;
-	__u64		ndts_res_failed;
-	__u64		ndts_lookups;
-	__u64		ndts_hits;
-	__u64		ndts_rcv_probes_mcast;
-	__u64		ndts_rcv_probes_ucast;
-	__u64		ndts_periodic_gc_runs;
-	__u64		ndts_forced_gc_runs;
-};
-
-enum {
-	NDTPA_UNSPEC,
-	NDTPA_IFINDEX,			/* u32, unchangeable */
-	NDTPA_REFCNT,			/* u32, read-only */
-	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
-	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
-	NDTPA_RETRANS_TIME,		/* u64, msecs */
-	NDTPA_GC_STALETIME,		/* u64, msecs */
-	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
-	NDTPA_QUEUE_LEN,		/* u32 */
-	NDTPA_APP_PROBES,		/* u32 */
-	NDTPA_UCAST_PROBES,		/* u32 */
-	NDTPA_MCAST_PROBES,		/* u32 */
-	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
-	NDTPA_PROXY_DELAY,		/* u64, msecs */
-	NDTPA_PROXY_QLEN,		/* u32 */
-	NDTPA_LOCKTIME,			/* u64, msecs */
-	__NDTPA_MAX
-};
-#define NDTPA_MAX (__NDTPA_MAX - 1)
-
-struct ndtmsg
-{
-	__u8		ndtm_family;
-	__u8		ndtm_pad1;
-	__u16		ndtm_pad2;
-};
-
-struct ndt_config
-{
-	__u16		ndtc_key_len;
-	__u16		ndtc_entry_size;
-	__u32		ndtc_entries;
-	__u32		ndtc_last_flush;	/* delta to now in msecs */
-	__u32		ndtc_last_rand;		/* delta to now in msecs */
-	__u32		ndtc_hash_rnd;
-	__u32		ndtc_hash_mask;
-	__u32		ndtc_hash_chain_gc;
-	__u32		ndtc_proxy_qlen;
-};
-
-enum {
-	NDTA_UNSPEC,
-	NDTA_NAME,			/* char *, unchangeable */
-	NDTA_THRESH1,			/* u32 */
-	NDTA_THRESH2,			/* u32 */
-	NDTA_THRESH3,			/* u32 */
-	NDTA_CONFIG,			/* struct ndt_config, read-only */
-	NDTA_PARMS,			/* nested TLV NDTPA_* */
-	NDTA_STATS,			/* struct ndt_stats, read-only */
-	NDTA_GC_INTERVAL,		/* u64, msecs */
-	__NDTA_MAX
-};
-#define NDTA_MAX (__NDTA_MAX - 1)
-
-#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
-		     NLMSG_ALIGN(sizeof(struct ndtmsg))))
-#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
-
-
 /****
  *		General form of address family dependent message.
  ****/
@@ -663,138 +443,6 @@ struct prefix_cacheinfo
 	__u32	valid_time;
 };
 
-/* The struct should be in sync with struct net_device_stats */
-struct rtnl_link_stats
-{
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
-	__u32	collisions;
-
-	/* detailed rx_errors: */
-	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
-
-	/* detailed tx_errors */
-	__u32	tx_aborted_errors;
-	__u32	tx_carrier_errors;
-	__u32	tx_fifo_errors;
-	__u32	tx_heartbeat_errors;
-	__u32	tx_window_errors;
-	
-	/* for cslip etc */
-	__u32	rx_compressed;
-	__u32	tx_compressed;
-};
-
-/* The struct should be in sync with struct ifmap */
-struct rtnl_link_ifmap
-{
-	__u64	mem_start;
-	__u64	mem_end;
-	__u64	base_addr;
-	__u16	irq;
-	__u8	dma;
-	__u8	port;
-};
-
-enum
-{
-	IFLA_UNSPEC,
-	IFLA_ADDRESS,
-	IFLA_BROADCAST,
-	IFLA_IFNAME,
-	IFLA_MTU,
-	IFLA_LINK,
-	IFLA_QDISC,
-	IFLA_STATS,
-	IFLA_COST,
-#define IFLA_COST IFLA_COST
-	IFLA_PRIORITY,
-#define IFLA_PRIORITY IFLA_PRIORITY
-	IFLA_MASTER,
-#define IFLA_MASTER IFLA_MASTER
-	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
-#define IFLA_WIRELESS IFLA_WIRELESS
-	IFLA_PROTINFO,		/* Protocol specific information for a link */
-#define IFLA_PROTINFO IFLA_PROTINFO
-	IFLA_TXQLEN,
-#define IFLA_TXQLEN IFLA_TXQLEN
-	IFLA_MAP,
-#define IFLA_MAP IFLA_MAP
-	IFLA_WEIGHT,
-#define IFLA_WEIGHT IFLA_WEIGHT
-	IFLA_OPERSTATE,
-	IFLA_LINKMODE,
-	__IFLA_MAX
-};
-
-
-#define IFLA_MAX (__IFLA_MAX - 1)
-
-#define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
-#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
-
-/* ifi_flags.
-
-   IFF_* flags.
-
-   The only change is:
-   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
-   more not changeable by user. They describe link media
-   characteristics and set by device driver.
-
-   Comments:
-   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
-   - If neither of these three flags are set;
-     the interface is NBMA.
-
-   - IFF_MULTICAST does not mean anything special:
-   multicasts can be used on all not-NBMA links.
-   IFF_MULTICAST means that this media uses special encapsulation
-   for multicast frames. Apparently, all IFF_POINTOPOINT and
-   IFF_BROADCAST devices are able to use multicasts too.
- */
-
-/* IFLA_LINK.
-   For usual devices it is equal ifi_index.
-   If it is a "virtual interface" (f.e. tunnel), ifi_link
-   can point to real physical interface (f.e. for bandwidth calculations),
-   or maybe 0, what means, that real media is unknown (usual
-   for IPIP tunnels, when route to endpoint is allowed to change)
- */
-
-/* Subtype attributes for IFLA_PROTINFO */
-enum
-{
-	IFLA_INET6_UNSPEC,
-	IFLA_INET6_FLAGS,	/* link flags			*/
-	IFLA_INET6_CONF,	/* sysctl parameters		*/
-	IFLA_INET6_STATS,	/* statistics			*/
-	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
-	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
-	__IFLA_INET6_MAX
-};
-
-#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
-
-struct ifla_cacheinfo
-{
-	__u32	max_reasm_len;
-	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
-	__u32	reachable_time;
-	__u32	retrans_time;
-};
 
 /*****************************************************************
  *		Traffic control messages.
@@ -885,10 +533,13 @@ enum rtnetlink_groups {
 	RTNLGRP_NOP2,
 	RTNLGRP_DECnet_ROUTE,
 #define RTNLGRP_DECnet_ROUTE	RTNLGRP_DECnet_ROUTE
-	RTNLGRP_NOP3,
+	RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE	RTNLGRP_DECnet_RULE
 	RTNLGRP_NOP4,
 	RTNLGRP_IPV6_PREFIX,
 #define RTNLGRP_IPV6_PREFIX	RTNLGRP_IPV6_PREFIX
+	RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE	RTNLGRP_IPV6_RULE
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
@@ -923,8 +574,6 @@ extern int rtattr_parse(struct rtattr *t
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
 
-extern struct sock *rtnl;
-
 struct rtnetlink_link
 {
 	int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
@@ -933,6 +582,10 @@ struct rtnetlink_link
 
 extern struct rtnetlink_link * rtnetlink_links[NPROTO];
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
+extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		       struct nlmsghdr *nlh, gfp_t flags);
+extern void rtnl_set_sk_err(u32 group, int error);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
@@ -1065,6 +718,13 @@ extern void __rtnl_unlock(void);
 	} \
 } while(0)
 
+static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
+{
+	return RTA_GET_U32(rta[RTA_TABLE-1]);
+rtattr_failure:
+	return table;
+}
+
 #endif /* __KERNEL__ */
 
 
diff -puN include/linux/security.h~git-net include/linux/security.h
--- a/include/linux/security.h~git-net
+++ a/include/linux/security.h
@@ -31,6 +31,8 @@
 #include <linux/msg.h>
 #include <linux/sched.h>
 #include <linux/key.h>
+#include <linux/xfrm.h>
+#include <net/flow.h>
 
 struct ctl_table;
 
@@ -88,6 +90,7 @@ extern int cap_netlink_recv(struct sk_bu
 struct nfsctl_arg;
 struct sched_param;
 struct swap_info_struct;
+struct request_sock;
 
 /* bprm_apply_creds unsafe reasons */
 #define LSM_UNSAFE_SHARE	1
@@ -812,9 +815,19 @@ struct swap_info_struct;
  *      which is used to copy security attributes between local stream sockets.
  * @sk_free_security:
  *	Deallocate security structure.
- * @sk_getsid:
- *	Retrieve the LSM-specific sid for the sock to enable caching of network
+ * @sk_clone_security:
+ *	Clone/copy security structure.
+ * @sk_getsecid:
+ *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
+ * @sock_graft:
+ *	Sets the socket's isec sid to the sock's sid.
+ * @inet_conn_request:
+ *	Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
+ * @inet_csk_clone:
+ *	Sets the new child socket's sid to the openreq sid.
+ * @req_classify_flow:
+ *	Sets the flow's sid to the openreq sid.
  *
  * Security hooks for XFRM operations.
  *
@@ -823,9 +836,10 @@ struct swap_info_struct;
  *	used by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level policy update program (e.g., setkey).
- *	Allocate a security structure to the xp->security field.
- *	The security field is initialized to NULL when the xfrm_policy is
- *	allocated.
+ *	@sk refers to the sock from which to derive the security context.
+ *	Allocate a security structure to the xp->security field; the security
+ *	field is initialized to NULL when the xfrm_policy is allocated. Only
+ *	one of sec_ctx or sock can be specified.
  *	Return 0 if operation was successful (memory to allocate, legal context)
  * @xfrm_policy_clone_security:
  *	@old contains an existing xfrm_policy in the SPD.
@@ -844,9 +858,14 @@ struct swap_info_struct;
  *	Database by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level SA generation program (e.g., setkey or racoon).
- *	Allocate a security structure to the x->security field.  The
- *	security field is initialized to NULL when the xfrm_state is
- *	allocated.
+ *	@polsec contains the security context information associated with a xfrm
+ *	policy rule from which to take the base context. polsec must be NULL
+ *	when sec_ctx is specified.
+ *	@secid contains the secid from which to take the mls portion of the context.
+ *	Allocate a security structure to the x->security field; the security
+ *	field is initialized to NULL when the xfrm_state is allocated. Set the
+ *	context to correspond to either sec_ctx or polsec, with the mls portion
+ *	taken from secid in the latter case.
  *	Return 0 if operation was successful (memory to allocate, legal context).
  * @xfrm_state_free_security:
  *	@x contains the xfrm_state.
@@ -857,13 +876,27 @@ struct swap_info_struct;
  * @xfrm_policy_lookup:
  *	@xp contains the xfrm_policy for which the access control is being
  *	checked.
- *	@sk_sid contains the sock security label that is used to authorize
+ *	@fl_secid contains the flow security label that is used to authorize
  *	access to the policy xp.
  *	@dir contains the direction of the flow (input or output).
- *	Check permission when a sock selects a xfrm_policy for processing
+ *	Check permission when a flow selects a xfrm_policy for processing
  *	XFRMs on a packet.  The hook is called when selecting either a
  *	per-socket policy or a generic xfrm policy.
  *	Return 0 if permission is granted.
+ * @xfrm_state_pol_flow_match:
+ *	@x contains the state to match.
+ *	@xp contains the policy to check for a match.
+ *	@fl contains the flow to check for a match.
+ *	Return 1 if there is a match.
+ * @xfrm_flow_state_match:
+ *	@fl contains the flow key to match.
+ *	@xfrm points to the xfrm_state to match.
+ *	Return 1 if there is a match.
+ * @xfrm_decode_session:
+ *	@skb points to skb to decode.
+ *	@secid points to the flow key secid to set.
+ *	@ckall says if all xfrms used should be checked for same secid.
+ *	Return 0 if ckall is zero or all xfrms used have the same secid.
  *
  * Security hooks affecting all Key Management operations
  *
@@ -1308,8 +1341,8 @@ struct security_operations {
 	int (*unix_may_send) (struct socket * sock, struct socket * other);
 
 	int (*socket_create) (int family, int type, int protocol, int kern);
-	void (*socket_post_create) (struct socket * sock, int family,
-				    int type, int protocol, int kern);
+	int (*socket_post_create) (struct socket * sock, int family,
+				   int type, int protocol, int kern);
 	int (*socket_bind) (struct socket * sock,
 			    struct sockaddr * address, int addrlen);
 	int (*socket_connect) (struct socket * sock,
@@ -1332,18 +1365,31 @@ struct security_operations {
 	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
-	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
+	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
+	void (*sk_getsecid) (struct sock *sk, u32 *secid);
+	void (*sock_graft)(struct sock* sk, struct socket *parent);
+	int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb,
+					struct request_sock *req);
+	void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req);
+	void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp,
+			struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk);
 	int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new);
 	void (*xfrm_policy_free_security) (struct xfrm_policy *xp);
 	int (*xfrm_policy_delete_security) (struct xfrm_policy *xp);
-	int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_state_alloc_security) (struct xfrm_state *x,
+		struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec,
+		u32 secid);
 	void (*xfrm_state_free_security) (struct xfrm_state *x);
 	int (*xfrm_state_delete_security) (struct xfrm_state *x);
-	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
+	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl);
+	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm);
+	int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 	/* key management security hooks */
@@ -2778,13 +2824,13 @@ static inline int security_socket_create
 	return security_ops->socket_create(family, type, protocol, kern);
 }
 
-static inline void security_socket_post_create(struct socket * sock, 
-					       int family,
-					       int type, 
-					       int protocol, int kern)
+static inline int security_socket_post_create(struct socket * sock,
+					      int family,
+					      int type,
+					      int protocol, int kern)
 {
-	security_ops->socket_post_create(sock, family, type,
-					 protocol, kern);
+	return security_ops->socket_post_create(sock, family, type,
+						protocol, kern);
 }
 
 static inline int security_socket_bind(struct socket * sock, 
@@ -2885,9 +2931,36 @@ static inline void security_sk_free(stru
 	return security_ops->sk_free_security(sk);
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
+{
+	return security_ops->sk_clone_security(sk, newsk);
+}
+
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
-	return security_ops->sk_getsid(sk, fl, dir);
+	security_ops->sk_getsecid(sk, &fl->secid);
+}
+
+static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+	security_ops->req_classify_flow(req, fl);
+}
+
+static inline void security_sock_graft(struct sock* sk, struct socket *parent)
+{
+	security_ops->sock_graft(sk, parent);
+}
+
+static inline int security_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
+{
+	return security_ops->inet_conn_request(sk, skb, req);
+}
+
+static inline void security_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+	security_ops->inet_csk_clone(newsk, req);
 }
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket * sock,
@@ -2909,11 +2982,12 @@ static inline int security_socket_create
 	return 0;
 }
 
-static inline void security_socket_post_create(struct socket * sock, 
-					       int family,
-					       int type, 
-					       int protocol, int kern)
+static inline int security_socket_post_create(struct socket * sock,
+					      int family,
+					      int type,
+					      int protocol, int kern)
 {
+	return 0;
 }
 
 static inline int security_socket_bind(struct socket * sock, 
@@ -3011,16 +3085,43 @@ static inline void security_sk_free(stru
 {
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
+{
+}
+
+static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+}
+
+static inline void security_sock_graft(struct sock* sk, struct socket *parent)
+{
+}
+
+static inline int security_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
 {
 	return 0;
 }
+
+static inline void security_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
 {
-	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx);
+	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL);
+}
+
+static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
+{
+	return security_ops->xfrm_policy_alloc_security(xp, NULL, sk);
 }
 
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
@@ -3038,9 +3139,18 @@ static inline int security_xfrm_policy_d
 	return security_ops->xfrm_policy_delete_security(xp);
 }
 
-static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static inline int security_xfrm_state_alloc(struct xfrm_state *x,
+			struct xfrm_user_sec_ctx *sec_ctx)
 {
-	return security_ops->xfrm_state_alloc_security(x, sec_ctx);
+	return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0);
+}
+
+static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
+				struct xfrm_sec_ctx *polsec, u32 secid)
+{
+	if (!polsec)
+		return 0;
+	return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid);
 }
 
 static inline int security_xfrm_state_delete(struct xfrm_state *x)
@@ -3053,9 +3163,32 @@ static inline void security_xfrm_state_f
 	security_ops->xfrm_state_free_security(x);
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
+{
+	return security_ops->xfrm_policy_lookup(xp, fl_secid, dir);
+}
+
+static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl)
 {
-	return security_ops->xfrm_policy_lookup(xp, sk_sid, dir);
+	return security_ops->xfrm_state_pol_flow_match(x, xp, fl);
+}
+
+static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	return security_ops->xfrm_flow_state_match(fl, xfrm);
+}
+
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
+{
+	return security_ops->xfrm_decode_session(skb, secid, 1);
+}
+
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
+{
+	int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0);
+
+	BUG_ON(rc);
 }
 #else	/* CONFIG_SECURITY_NETWORK_XFRM */
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
@@ -3063,6 +3196,11 @@ static inline int security_xfrm_policy_a
 	return 0;
 }
 
+static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
+{
+	return 0;
+}
+
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
 {
 	return 0;
@@ -3077,7 +3215,14 @@ static inline int security_xfrm_policy_d
 	return 0;
 }
 
-static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static inline int security_xfrm_state_alloc(struct xfrm_state *x,
+					struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
+					struct xfrm_sec_ctx *polsec, u32 secid)
 {
 	return 0;
 }
@@ -3091,10 +3236,32 @@ static inline int security_xfrm_state_de
 	return 0;
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
+{
+	return 0;
+}
+
+static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl)
+{
+	return 1;
+}
+
+static inline int security_xfrm_flow_state_match(struct flowi *fl,
+                                struct xfrm_state *xfrm)
+{
+	return 1;
+}
+
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
 {
 	return 0;
 }
+
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
+{
+}
+
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 #ifdef CONFIG_KEYS
diff -puN include/linux/skbuff.h~git-net include/linux/skbuff.h
--- a/include/linux/skbuff.h~git-net
+++ a/include/linux/skbuff.h
@@ -34,8 +34,9 @@
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
 
 #define CHECKSUM_NONE 0
-#define CHECKSUM_HW 1
+#define CHECKSUM_PARTIAL 1
 #define CHECKSUM_UNNECESSARY 2
+#define CHECKSUM_COMPLETE 3
 
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
@@ -56,17 +57,17 @@
  *	      Apparently with secret goal to sell you new device, when you
  *	      will add new protocol to your host. F.e. IPv6. 8)
  *
- *	HW: the most generic way. Device supplied checksum of _all_
+ *	COMPLETE: the most generic way. Device supplied checksum of _all_
  *	    the packet as seen by netif_rx in skb->csum.
  *	    NOTE: Even if device supports only some protocols, but
- *	    is able to produce some skb->csum, it MUST use HW,
+ *	    is able to produce some skb->csum, it MUST use COMPLETE,
  *	    not UNNECESSARY.
  *
  * B. Checksumming on output.
  *
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
- *	HW: device is required to csum packet as seen by hard_start_xmit
+ *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
  *	from skb->h.raw to the end and to record the checksum
  *	at skb->h.raw+skb->csum.
  *
@@ -1081,7 +1082,7 @@ static inline void __skb_queue_purge(str
  *	the headroom they think they need without accounting for the
  *	built in space. The built in space is used for optimisations.
  *
- *	%NULL is returned in there is no free memory.
+ *	%NULL is returned if there is no free memory.
  */
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      gfp_t gfp_mask)
@@ -1101,7 +1102,7 @@ static inline struct sk_buff *__dev_allo
  *	the headroom they think they need without accounting for the
  *	built in space. The built in space is used for optimisations.
  *
- *	%NULL is returned in there is no free memory. Although this function
+ *	%NULL is returned if there is no free memory. Although this function
  *	allocates memory it can be called from an interrupt.
  */
 static inline struct sk_buff *dev_alloc_skb(unsigned int length)
@@ -1246,14 +1247,14 @@ static inline int skb_linearize_cow(stru
  *	@len: length of data pulled
  *
  *	After doing a pull on a received packet, you need to call this to
- *	update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE
- *	so that it can be recomputed from scratch.
+ *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
+ *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
 
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 }
 
@@ -1272,7 +1273,7 @@ static inline int pskb_trim_rcsum(struct
 {
 	if (likely(len >= skb->len))
 		return 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 	return __pskb_trim(skb, len);
 }
diff -puN include/linux/snmp.h~git-net include/linux/snmp.h
--- a/include/linux/snmp.h~git-net
+++ a/include/linux/snmp.h
@@ -155,6 +155,8 @@ enum
 	UDP_MIB_NOPORTS,			/* NoPorts */
 	UDP_MIB_INERRORS,			/* InErrors */
 	UDP_MIB_OUTDATAGRAMS,			/* OutDatagrams */
+	UDP_MIB_RCVBUFERRORS,			/* RcvbufErrors */
+	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
 	__UDP_MIB_MAX
 };
 
diff -puN include/linux/sysctl.h~git-net include/linux/sysctl.h
--- a/include/linux/sysctl.h~git-net
+++ a/include/linux/sysctl.h
@@ -411,6 +411,10 @@ enum
 	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
 	NET_TCP_DMA_COPYBREAK=116,
 	NET_TCP_SLOW_START_AFTER_IDLE=117,
+	NET_CIPSOV4_CACHE_ENABLE=118,
+	NET_CIPSOV4_CACHE_BUCKET_SIZE=119,
+	NET_CIPSOV4_RBM_OPTFMT=120,
+	NET_CIPSOV4_RBM_STRICTVALID=121,
 };
 
 enum {
diff -puN /dev/null include/net/cipso_ipv4.h
--- /dev/null
+++ a/include/net/cipso_ipv4.h
@@ -0,0 +1,250 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _CIPSO_IPV4_H
+#define _CIPSO_IPV4_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <net/netlabel.h>
+
+/* known doi values */
+#define CIPSO_V4_DOI_UNKNOWN          0x00000000
+
+/* tag types */
+#define CIPSO_V4_TAG_INVALID          0
+#define CIPSO_V4_TAG_RBITMAP          1
+#define CIPSO_V4_TAG_ENUM             2
+#define CIPSO_V4_TAG_RANGE            5
+#define CIPSO_V4_TAG_PBITMAP          6
+#define CIPSO_V4_TAG_FREEFORM         7
+
+/* doi mapping types */
+#define CIPSO_V4_MAP_UNKNOWN          0
+#define CIPSO_V4_MAP_STD              1
+#define CIPSO_V4_MAP_PASS             2
+
+/* limits */
+#define CIPSO_V4_MAX_REM_LVLS         256
+#define CIPSO_V4_INV_LVL              0x80000000
+#define CIPSO_V4_MAX_LOC_LVLS         (CIPSO_V4_INV_LVL - 1)
+#define CIPSO_V4_MAX_REM_CATS         65536
+#define CIPSO_V4_INV_CAT              0x80000000
+#define CIPSO_V4_MAX_LOC_CATS         (CIPSO_V4_INV_CAT - 1)
+
+/*
+ * CIPSO DOI definitions
+ */
+
+/* DOI definition struct */
+#define CIPSO_V4_TAG_MAXCNT           5
+struct cipso_v4_doi {
+	u32 doi;
+	u32 type;
+	union {
+		struct cipso_v4_std_map_tbl *std;
+	} map;
+	u8 tags[CIPSO_V4_TAG_MAXCNT];
+
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head dom_list;
+};
+
+/* Standard CIPSO mapping table */
+/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the
+ *       bit is set then consider that value as unspecified, meaning the
+ *       mapping for that particular level/category is invalid */
+struct cipso_v4_std_map_tbl {
+	struct {
+		u32 *cipso;
+		u32 *local;
+		u32 cipso_size;
+		u32 local_size;
+	} lvl;
+	struct {
+		u32 *cipso;
+		u32 *local;
+		u32 cipso_size;
+		u32 local_size;
+	} cat;
+};
+
+/*
+ * Sysctl Variables
+ */
+
+#ifdef CONFIG_NETLABEL
+extern int cipso_v4_cache_enabled;
+extern int cipso_v4_cache_bucketsize;
+extern int cipso_v4_rbm_optfmt;
+extern int cipso_v4_rbm_strictvalid;
+#endif
+
+/*
+ * Helper Functions
+ */
+
+#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
+#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso)
+
+/*
+ * DOI List Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom);
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom);
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+			       const char *domain);
+#else
+static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_doi_remove(u32 doi,
+				    void (*callback) (struct rcu_head * head))
+{
+	return 0;
+}
+
+static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+	return NULL;
+}
+
+static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+	return NULL;
+}
+
+static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+	return NULL;
+}
+
+static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
+					  const char *domain)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+					     const char *domain)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+void cipso_v4_cache_invalidate(void);
+int cipso_v4_cache_add(const struct sk_buff *skb,
+		       const struct netlbl_lsm_secattr *secattr);
+#else
+static inline void cipso_v4_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int cipso_v4_cache_add(const struct sk_buff *skb,
+				     const struct netlbl_lsm_secattr *secattr)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * Protocol Handling Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
+int cipso_v4_socket_setopt(struct socket *sock,
+			   unsigned char *opt,
+			   u32 opt_len);
+int cipso_v4_socket_setattr(const struct socket *sock,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_socket_getopt(const struct socket *sock,
+			   unsigned char **opt,
+			   u32 *opt_len);
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+			    struct netlbl_lsm_secattr *secattr);
+int cipso_v4_validate(unsigned char **option);
+#else
+static inline void cipso_v4_error(struct sk_buff *skb,
+				  int error,
+				  u32 gateway)
+{
+	return;
+}
+
+static inline int cipso_v4_socket_setattr(const struct socket *sock,
+				  const struct cipso_v4_doi *doi_def,
+				  const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_socket_getattr(const struct socket *sock,
+					  struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+					  struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_validate(unsigned char **option)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif /* _CIPSO_IPV4_H */
diff -puN include/net/dn_fib.h~git-net include/net/dn_fib.h
--- a/include/net/dn_fib.h~git-net
+++ a/include/net/dn_fib.h
@@ -22,7 +22,7 @@ struct dn_kern_rta
 };
 
 struct dn_fib_res {
-	struct dn_fib_rule *r;
+	struct fib_rule *r;
 	struct dn_fib_info *fi;
 	unsigned char prefixlen;
 	unsigned char nh_sel;
@@ -94,7 +94,8 @@ struct dn_fib_node {
 
 
 struct dn_fib_table {
-	int n;
+	struct hlist_node hlist;
+	u32 n;
 
 	int (*insert)(struct dn_fib_table *t, struct rtmsg *r, 
 			struct dn_kern_rta *rta, struct nlmsghdr *n, 
@@ -130,14 +131,11 @@ extern __le16 dn_fib_get_attr16(struct r
 extern void dn_fib_flush(void);
 extern void dn_fib_select_multipath(const struct flowi *fl,
 					struct dn_fib_res *res);
-extern int dn_fib_sync_down(__le16 local, struct net_device *dev,
-				int force);
-extern int dn_fib_sync_up(struct net_device *dev);
 
 /*
  * dn_tables.c
  */
-extern struct dn_fib_table *dn_fib_get_table(int n, int creat);
+extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat);
 extern struct dn_fib_table *dn_fib_empty_table(void);
 extern void dn_fib_table_init(void);
 extern void dn_fib_table_cleanup(void);
@@ -147,10 +145,8 @@ extern void dn_fib_table_cleanup(void);
  */
 extern void dn_fib_rules_init(void);
 extern void dn_fib_rules_cleanup(void);
-extern void dn_fib_rule_put(struct dn_fib_rule *);
-extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags);
 extern unsigned dnet_addr_type(__le16 addr);
-extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res);
+extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res);
 
 /*
  * rtnetlink interface
@@ -176,11 +172,9 @@ static inline void dn_fib_res_put(struct
 	if (res->fi)
 		dn_fib_info_put(res->fi);
 	if (res->r)
-		dn_fib_rule_put(res->r);
+		fib_rule_put(res->r);
 }
 
-extern struct dn_fib_table *dn_fib_tables[];
-
 #else /* Endnode */
 
 #define dn_fib_init()  do { } while(0)
diff -puN /dev/null include/net/fib_rules.h
--- /dev/null
+++ a/include/net/fib_rules.h
@@ -0,0 +1,97 @@
+#ifndef __NET_FIB_RULES_H
+#define __NET_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/fib_rules.h>
+#include <net/flow.h>
+#include <net/netlink.h>
+
+struct fib_rule
+{
+	struct list_head	list;
+	atomic_t		refcnt;
+	int			ifindex;
+	char			ifname[IFNAMSIZ];
+	u32			pref;
+	u32			flags;
+	u32			table;
+	u8			action;
+	struct rcu_head		rcu;
+};
+
+struct fib_lookup_arg
+{
+	void			*lookup_ptr;
+	void			*result;
+	struct fib_rule		*rule;
+};
+
+struct fib_rules_ops
+{
+	int			family;
+	struct list_head	list;
+	int			rule_size;
+
+	int			(*action)(struct fib_rule *,
+					  struct flowi *, int,
+					  struct fib_lookup_arg *);
+	int			(*match)(struct fib_rule *,
+					 struct flowi *, int);
+	int			(*configure)(struct fib_rule *,
+					     struct sk_buff *,
+					     struct nlmsghdr *,
+					     struct fib_rule_hdr *,
+					     struct nlattr **);
+	int			(*compare)(struct fib_rule *,
+					   struct fib_rule_hdr *,
+					   struct nlattr **);
+	int			(*fill)(struct fib_rule *, struct sk_buff *,
+					struct nlmsghdr *,
+					struct fib_rule_hdr *);
+	u32			(*default_pref)(void);
+
+	int			nlgroup;
+	struct nla_policy	*policy;
+	struct list_head	*rules_list;
+	struct module		*owner;
+};
+
+static inline void fib_rule_get(struct fib_rule *rule)
+{
+	atomic_inc(&rule->refcnt);
+}
+
+static inline void fib_rule_put_rcu(struct rcu_head *head)
+{
+	struct fib_rule *rule = container_of(head, struct fib_rule, rcu);
+	kfree(rule);
+}
+
+static inline void fib_rule_put(struct fib_rule *rule)
+{
+	if (atomic_dec_and_test(&rule->refcnt))
+		call_rcu(&rule->rcu, fib_rule_put_rcu);
+}
+
+static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
+{
+	if (nla[FRA_TABLE])
+		return nla_get_u32(nla[FRA_TABLE]);
+	return frh->table;
+}
+
+extern int			fib_rules_register(struct fib_rules_ops *);
+extern int			fib_rules_unregister(struct fib_rules_ops *);
+
+extern int			fib_rules_lookup(struct fib_rules_ops *,
+						 struct flowi *, int flags,
+						 struct fib_lookup_arg *);
+
+extern int			fib_nl_newrule(struct sk_buff *,
+					       struct nlmsghdr *, void *);
+extern int			fib_nl_delrule(struct sk_buff *,
+					       struct nlmsghdr *, void *);
+extern int			fib_rules_dump(struct sk_buff *,
+					       struct netlink_callback *, int);
+#endif
diff -puN include/net/flow.h~git-net include/net/flow.h
--- a/include/net/flow.h~git-net
+++ a/include/net/flow.h
@@ -78,6 +78,7 @@ struct flowi {
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 #define FLOW_DIR_IN	0
@@ -85,10 +86,10 @@ struct flowi {
 #define FLOW_DIR_FWD	2
 
 struct sock;
-typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp);
 
-extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 	 		       flow_resolve_t resolver);
 extern void flow_cache_flush(void);
 extern atomic_t flow_cache_genid;
diff -puN include/net/genetlink.h~git-net include/net/genetlink.h
--- a/include/net/genetlink.h~git-net
+++ a/include/net/genetlink.h
@@ -133,11 +133,12 @@ static inline int genlmsg_cancel(struct 
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
+ * @flags: allocation flags
  */
 static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
-				    unsigned int group)
+				    unsigned int group, gfp_t flags)
 {
-	return nlmsg_multicast(genl_sock, skb, pid, group);
+	return nlmsg_multicast(genl_sock, skb, pid, group, flags);
 }
 
 /**
diff -puN include/net/inet_hashtables.h~git-net include/net/inet_hashtables.h
--- a/include/net/inet_hashtables.h~git-net
+++ a/include/net/inet_hashtables.h
@@ -271,38 +271,15 @@ static inline int inet_iif(const struct 
 	return ((struct rtable *)skb->dst)->rt_iif;
 }
 
-extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
+extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
 					   const u32 daddr,
 					   const unsigned short hnum,
 					   const int dif);
 
-/* Optimize the common listener case. */
-static inline struct sock *
-		inet_lookup_listener(struct inet_hashinfo *hashinfo,
-				     const u32 daddr,
-				     const unsigned short hnum, const int dif)
+static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
+						u32 daddr, u16 dport, int dif)
 {
-	struct sock *sk = NULL;
-	const struct hlist_head *head;
-
-	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
-	if (!hlist_empty(head)) {
-		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-
-		if (inet->num == hnum && !sk->sk_node.next &&
-		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
-		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-		    !sk->sk_bound_dev_if)
-			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
-	}
-	if (sk) {
-sherry_cache:
-		sock_hold(sk);
-	}
-	read_unlock(&hashinfo->lhash_lock);
-	return sk;
+	return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif);
 }
 
 /* Socket demux engine toys. */
@@ -391,14 +368,25 @@ hit:
 	goto out;
 }
 
+static inline struct sock *
+	inet_lookup_established(struct inet_hashinfo *hashinfo,
+				const u32 saddr, const u16 sport,
+				const u32 daddr, const u16 dport,
+				const int dif)
+{
+	return __inet_lookup_established(hashinfo, saddr, sport, daddr,
+					 ntohs(dport), dif);
+}
+
 static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
 					 const u32 saddr, const u16 sport,
-					 const u32 daddr, const u16 hnum,
+					 const u32 daddr, const u16 dport,
 					 const int dif)
 {
+	u16 hnum = ntohs(dport);
 	struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
 						    hnum, dif);
-	return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
+	return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif);
 }
 
 static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
@@ -409,7 +397,7 @@ static inline struct sock *inet_lookup(s
 	struct sock *sk;
 
 	local_bh_disable();
-	sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+	sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif);
 	local_bh_enable();
 
 	return sk;
diff -puN include/net/inet_sock.h~git-net include/net/inet_sock.h
--- a/include/net/inet_sock.h~git-net
+++ a/include/net/inet_sock.h
@@ -27,7 +27,6 @@
 /** struct ip_options - IP Options
  *
  * @faddr - Saved first hop address
- * @is_setbyuser - Set by setsockopt?
  * @is_data - Options in __data, rather than skb
  * @is_strictroute - Strict source route
  * @srr_is_hit - Packet destination addr was our one
@@ -42,8 +41,7 @@ struct ip_options {
 	unsigned char	srr;
 	unsigned char	rr;
 	unsigned char	ts;
-	unsigned char	is_setbyuser:1,
-			is_data:1,
+	unsigned char	is_data:1,
 			is_strictroute:1,
 			srr_is_hit:1,
 			is_changed:1,
@@ -51,7 +49,7 @@ struct ip_options {
 			ts_needtime:1,
 			ts_needaddr:1;
 	unsigned char	router_alert;
-	unsigned char	__pad1;
+	unsigned char	cipso;
 	unsigned char	__pad2;
 	unsigned char	__data[0];
 };
diff -puN include/net/ip6_fib.h~git-net include/net/ip6_fib.h
--- a/include/net/ip6_fib.h~git-net
+++ a/include/net/ip6_fib.h
@@ -51,6 +51,8 @@ struct rt6key
 	int		plen;
 };
 
+struct fib6_table;
+
 struct rt6_info
 {
 	union {
@@ -71,6 +73,7 @@ struct rt6_info
 	u32				rt6i_flags;
 	u32				rt6i_metric;
 	atomic_t			rt6i_ref;
+	struct fib6_table		*rt6i_table;
 
 	struct rt6key			rt6i_dst;
 	struct rt6key			rt6i_src;
@@ -89,28 +92,6 @@ struct fib6_walker_t
 	void *args;
 };
 
-extern struct fib6_walker_t fib6_walker_list;
-extern rwlock_t fib6_walker_lock;
-
-static inline void fib6_walker_link(struct fib6_walker_t *w)
-{
-	write_lock_bh(&fib6_walker_lock);
-	w->next = fib6_walker_list.next;
-	w->prev = &fib6_walker_list;
-	w->next->prev = w;
-	w->prev->next = w;
-	write_unlock_bh(&fib6_walker_lock);
-}
-
-static inline void fib6_walker_unlink(struct fib6_walker_t *w)
-{
-	write_lock_bh(&fib6_walker_lock);
-	w->next->prev = w->prev;
-	w->prev->next = w->next;
-	w->prev = w->next = w;
-	write_unlock_bh(&fib6_walker_lock);
-}
-
 struct rt6_statistics {
 	__u32		fib_nodes;
 	__u32		fib_route_nodes;
@@ -143,12 +124,44 @@ struct rt6_statistics {
 
 typedef void			(*f_pnode)(struct fib6_node *fn, void *);
 
-extern struct fib6_node		ip6_routing_table;
+struct fib6_table {
+	struct hlist_node	tb6_hlist;
+	u32			tb6_id;
+	rwlock_t		tb6_lock;
+	struct fib6_node	tb6_root;
+};
+
+#define RT6_TABLE_UNSPEC	RT_TABLE_UNSPEC
+#define RT6_TABLE_MAIN		RT_TABLE_MAIN
+#define RT6_TABLE_DFLT		RT6_TABLE_MAIN
+#define RT6_TABLE_INFO		RT6_TABLE_MAIN
+#define RT6_TABLE_PREFIX	RT6_TABLE_MAIN
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB6_TABLE_MIN		1
+#define FIB6_TABLE_MAX		RT_TABLE_MAX
+#define RT6_TABLE_LOCAL		RT_TABLE_LOCAL
+#else
+#define FIB6_TABLE_MIN		RT_TABLE_MAIN
+#define FIB6_TABLE_MAX		FIB6_TABLE_MIN
+#define RT6_TABLE_LOCAL		RT6_TABLE_MAIN
+#endif
+
+#define RT6_F_STRICT		1
+#define RT6_F_HAS_SADDR		2
+
+typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
+					 struct flowi *, int);
 
 /*
  *	exported functions
  */
 
+extern struct fib6_table *	fib6_get_table(u32 id);
+extern struct fib6_table *	fib6_new_table(u32 id);
+extern struct dst_entry *	fib6_rule_lookup(struct flowi *fl, int flags,
+						 pol_lookup_t lookup);
+
 extern struct fib6_node		*fib6_lookup(struct fib6_node *root,
 					     struct in6_addr *daddr,
 					     struct in6_addr *saddr);
@@ -157,12 +170,8 @@ struct fib6_node		*fib6_locate(struct fi
 					     struct in6_addr *daddr, int dst_len,
 					     struct in6_addr *saddr, int src_len);
 
-extern void			fib6_clean_tree(struct fib6_node *root,
-						int (*func)(struct rt6_info *, void *arg),
-						int prune, void *arg);
-
-extern int			fib6_walk(struct fib6_walker_t *w);
-extern int			fib6_walk_continue(struct fib6_walker_t *w);
+extern void			fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+					       int prune, void *arg);
 
 extern int			fib6_add(struct fib6_node *root,
 					 struct rt6_info *rt,
@@ -184,5 +193,11 @@ extern void			fib6_run_gc(unsigned long 
 extern void			fib6_gc_cleanup(void);
 
 extern void			fib6_init(void);
+
+extern void			fib6_rules_init(void);
+extern void			fib6_rules_cleanup(void);
+extern int			fib6_rules_dump(struct sk_buff *,
+						struct netlink_callback *);
+
 #endif
 #endif
diff -puN include/net/ip6_route.h~git-net include/net/ip6_route.h
--- a/include/net/ip6_route.h~git-net
+++ a/include/net/ip6_route.h
@@ -41,6 +41,11 @@ struct pol_chain {
 
 extern struct rt6_info	ip6_null_entry;
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+extern struct rt6_info	ip6_prohibit_entry;
+extern struct rt6_info	ip6_blk_hole_entry;
+#endif
+
 extern int ip6_rt_gc_interval;
 
 extern void			ip6_route_input(struct sk_buff *skb);
@@ -58,7 +63,8 @@ extern int			ipv6_route_ioctl(unsigned i
 extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
 					      struct nlmsghdr *,
 					      void *rtattr,
-					      struct netlink_skb_parms *req);
+					      struct netlink_skb_parms *req,
+					      u32 table_id);
 extern int			ip6_ins_rt(struct rt6_info *,
 					   struct nlmsghdr *,
 					   void *rtattr,
@@ -131,6 +137,13 @@ extern int inet6_rtm_newroute(struct sk_
 extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 
+struct rt6_rtnl_dump_arg
+{
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
 extern void rt6_ifdown(struct net_device *dev);
 extern void rt6_mtu_change(struct net_device *dev, unsigned mtu);
 
diff -puN include/net/ip_fib.h~git-net include/net/ip_fib.h
--- a/include/net/ip_fib.h~git-net
+++ a/include/net/ip_fib.h
@@ -18,26 +18,34 @@
 
 #include <net/flow.h>
 #include <linux/seq_file.h>
+#include <net/fib_rules.h>
 
-/* WARNING: The ordering of these elements must match ordering
- *          of RTA_* rtnetlink attribute numbers.
- */
-struct kern_rta {
-	void		*rta_dst;
-	void		*rta_src;
-	int		*rta_iif;
-	int		*rta_oif;
-	void		*rta_gw;
-	u32		*rta_priority;
-	void		*rta_prefsrc;
-	struct rtattr	*rta_mx;
-	struct rtattr	*rta_mp;
-	unsigned char	*rta_protoinfo;
-	u32		*rta_flow;
-	struct rta_cacheinfo *rta_ci;
-	struct rta_session *rta_sess;
-	u32		*rta_mp_alg;
-};
+struct fib_config {
+	u8			fc_family;
+	u8			fc_dst_len;
+	u8			fc_src_len;
+	u8			fc_tos;
+	u8			fc_protocol;
+	u8			fc_scope;
+	u8			fc_type;
+	/* 1 byte unused */
+	u32			fc_table;
+	u32			fc_dst;
+	u32			fc_src;
+	u32			fc_gw;
+	int			fc_oif;
+	u32			fc_flags;
+	u32			fc_priority;
+	u32			fc_prefsrc;
+	struct nlattr		*fc_mx;
+	struct rtnexthop	*fc_mp;
+	int			fc_mx_len;
+	int			fc_mp_len;
+	u32			fc_flow;
+	u32			fc_mp_alg;
+	u32			fc_nlflags;
+	struct nl_info		fc_nlinfo;
+ };
 
 struct fib_info;
 
@@ -149,15 +157,12 @@ struct fib_result_nl {
 #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
 
 struct fib_table {
-	unsigned char	tb_id;
+	struct hlist_node tb_hlist;
+	u32		tb_id;
 	unsigned	tb_stamp;
 	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
-	int		(*tb_insert)(struct fib_table *table, struct rtmsg *r,
-				     struct kern_rta *rta, struct nlmsghdr *n,
-				     struct netlink_skb_parms *req);
-	int		(*tb_delete)(struct fib_table *table, struct rtmsg *r,
-				     struct kern_rta *rta, struct nlmsghdr *n,
-				     struct netlink_skb_parms *req);
+	int		(*tb_insert)(struct fib_table *, struct fib_config *);
+	int		(*tb_delete)(struct fib_table *, struct fib_config *);
 	int		(*tb_dump)(struct fib_table *table, struct sk_buff *skb,
 				     struct netlink_callback *cb);
 	int		(*tb_flush)(struct fib_table *table);
@@ -172,14 +177,14 @@ struct fib_table {
 extern struct fib_table *ip_fib_local_table;
 extern struct fib_table *ip_fib_main_table;
 
-static inline struct fib_table *fib_get_table(int id)
+static inline struct fib_table *fib_get_table(u32 id)
 {
 	if (id != RT_TABLE_LOCAL)
 		return ip_fib_main_table;
 	return ip_fib_local_table;
 }
 
-static inline struct fib_table *fib_new_table(int id)
+static inline struct fib_table *fib_new_table(u32 id)
 {
 	return fib_get_table(id);
 }
@@ -199,35 +204,19 @@ static inline void fib_select_default(co
 }
 
 #else /* CONFIG_IP_MULTIPLE_TABLES */
-#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
-#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
-
-extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
-extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
-extern struct fib_table *__fib_new_table(int id);
-extern void fib_rule_put(struct fib_rule *r);
-
-static inline struct fib_table *fib_get_table(int id)
-{
-	if (id == 0)
-		id = RT_TABLE_MAIN;
-
-	return fib_tables[id];
-}
-
-static inline struct fib_table *fib_new_table(int id)
-{
-	if (id == 0)
-		id = RT_TABLE_MAIN;
+#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
+#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
 
-	return fib_tables[id] ? : __fib_new_table(id);
-}
+extern int fib_lookup(struct flowi *flp, struct fib_result *res);
 
+extern struct fib_table *fib_new_table(u32 id);
+extern struct fib_table *fib_get_table(u32 id);
 extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
+extern struct nla_policy rtm_ipv4_policy[];
 extern void		ip_fib_init(void);
 extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
@@ -243,23 +232,20 @@ struct rtentry;
 extern int ip_fib_check_default(u32 gw, struct net_device *dev);
 extern int fib_sync_down(u32 local, struct net_device *dev, int force);
 extern int fib_sync_up(struct net_device *dev);
-extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
-			       struct kern_rta *rta, struct rtentry *r);
 extern u32  __fib_res_prefsrc(struct fib_result *res);
 
 /* Exported by fib_hash.c */
-extern struct fib_table *fib_hash_init(int id);
+extern struct fib_table *fib_hash_init(u32 id);
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-/* Exported by fib_rules.c */
+extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
+
+extern void __init fib4_rules_init(void);
 
-extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
 #ifdef CONFIG_NET_CLS_ROUTE
 extern u32 fib_rules_tclass(struct fib_result *res);
 #endif
-extern void fib_rules_init(void);
+
 #endif
 
 static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
diff -puN include/net/neighbour.h~git-net include/net/neighbour.h
--- a/include/net/neighbour.h~git-net
+++ a/include/net/neighbour.h
@@ -1,6 +1,8 @@
 #ifndef _NET_NEIGHBOUR_H
 #define _NET_NEIGHBOUR_H
 
+#include <linux/neighbour.h>
+
 /*
  *	Generic neighbour manipulation
  *
@@ -14,40 +16,6 @@
  *		- Add neighbour cache statistics like rtstat
  */
 
-/* The following flags & states are exported to user space,
-   so that they should be moved to include/linux/ directory.
- */
-
-/*
- *	Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_ROUTER	0x80
-
-/*
- *	Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE	0x01
-#define NUD_REACHABLE	0x02
-#define NUD_STALE	0x04
-#define NUD_DELAY	0x08
-#define NUD_PROBE	0x10
-#define NUD_FAILED	0x20
-
-/* Dummy states */
-#define NUD_NOARP	0x40
-#define NUD_PERMANENT	0x80
-#define NUD_NONE	0x00
-
-/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
-   and make no address resolution or NUD.
-   NUD_PERMANENT is also cannot be deleted by garbage collectors.
- */
-
-#ifdef __KERNEL__
-
 #include <asm/atomic.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -133,7 +101,7 @@ struct neighbour
 	__u8			dead;
 	atomic_t		probes;
 	rwlock_t		lock;
-	unsigned char		ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];
+	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
 	atomic_t		refcnt;
 	int			(*output)(struct sk_buff *skb);
@@ -374,6 +342,3 @@ struct neighbour_cb {
 #define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)
 
 #endif
-#endif
-
-
diff -puN /dev/null include/net/netlabel.h
--- /dev/null
+++ a/include/net/netlabel.h
@@ -0,0 +1,291 @@
+/*
+ * NetLabel System
+ *
+ * The NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_H
+#define _NETLABEL_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+
+/*
+ * NetLabel - A management interface for maintaining network packet label
+ *            mapping tables for explicit packet labling protocols.
+ *
+ * Network protocols such as CIPSO and RIPSO require a label translation layer
+ * to convert the label on the packet into something meaningful on the host
+ * machine.  In the current Linux implementation these mapping tables live
+ * inside the kernel; NetLabel provides a mechanism for user space applications
+ * to manage these mapping tables.
+ *
+ * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to
+ * send messages between kernel and user space.  The general format of a
+ * NetLabel message is shown below:
+ *
+ *  +-----------------+-------------------+--------- --- -- -
+ *  | struct nlmsghdr | struct genlmsghdr | payload
+ *  +-----------------+-------------------+--------- --- -- -
+ *
+ * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal.
+ * The payload is dependent on the subsystem specified in the
+ * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions
+ * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c
+ * file.  All of the fields in the NetLabel payload are NETLINK attributes, the
+ * length of each field is the length of the NETLINK attribute payload, see
+ * include/net/netlink.h for more information on NETLINK attributes.
+ *
+ */
+
+/*
+ * NetLabel NETLINK protocol
+ */
+
+#define NETLBL_PROTO_VERSION            1
+
+/* NetLabel NETLINK types/families */
+#define NETLBL_NLTYPE_NONE              0
+#define NETLBL_NLTYPE_MGMT              1
+#define NETLBL_NLTYPE_MGMT_NAME         "NLBL_MGMT"
+#define NETLBL_NLTYPE_RIPSO             2
+#define NETLBL_NLTYPE_RIPSO_NAME        "NLBL_RIPSO"
+#define NETLBL_NLTYPE_CIPSOV4           3
+#define NETLBL_NLTYPE_CIPSOV4_NAME      "NLBL_CIPSOv4"
+#define NETLBL_NLTYPE_CIPSOV6           4
+#define NETLBL_NLTYPE_CIPSOV6_NAME      "NLBL_CIPSOv6"
+#define NETLBL_NLTYPE_UNLABELED         5
+#define NETLBL_NLTYPE_UNLABELED_NAME    "NLBL_UNLBL"
+
+/* NetLabel return codes */
+#define NETLBL_E_OK                     0
+
+/*
+ * Helper functions
+ */
+
+#define NETLBL_LEN_U8                   nla_total_size(sizeof(u8))
+#define NETLBL_LEN_U16                  nla_total_size(sizeof(u16))
+#define NETLBL_LEN_U32                  nla_total_size(sizeof(u32))
+
+/**
+ * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer
+ * @head: the amount of headroom in bytes
+ * @body: the desired size (minus headroom) in bytes
+ * @gfp_flags: the alloc flags to pass to alloc_skb()
+ *
+ * Description:
+ * Allocate a NETLINK message buffer based on the sizes given in @head and
+ * @body.  If @head is greater than zero skb_reserve() is called to reserve
+ * @head bytes at the start of the buffer.  Returns a valid sk_buff pointer on
+ * success, NULL on failure.
+ *
+ */
+static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head,
+						       size_t body,
+						       int gfp_flags)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags);
+	if (skb == NULL)
+		return NULL;
+	if (head > 0) {
+		skb_reserve(skb, head);
+		if (skb_tailroom(skb) < body) {
+			kfree_skb(skb);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+/*
+ * NetLabel - Kernel API for accessing the network packet label mappings.
+ *
+ * The following functions are provided for use by other kernel modules,
+ * specifically kernel LSM modules, to provide a consistent, transparent API
+ * for dealing with explicit packet labeling protocols such as CIPSO and
+ * RIPSO.  The functions defined here are implemented in the
+ * net/netlabel/netlabel_kapi.c file.
+ *
+ */
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map;
+
+/* Domain mapping operations */
+int netlbl_domhsh_remove(const char *domain);
+
+/* LSM security attributes */
+struct netlbl_lsm_cache {
+	void (*free) (const void *data);
+	void *data;
+};
+struct netlbl_lsm_secattr {
+	char *domain;
+
+	u32 mls_lvl;
+	u32 mls_lvl_vld;
+	unsigned char *mls_cat;
+	size_t mls_cat_len;
+
+	struct netlbl_lsm_cache cache;
+};
+
+/*
+ * LSM security attribute operations
+ */
+
+
+/**
+ * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct
+ * @secattr: the struct to initialize
+ *
+ * Description:
+ * Initialize an already allocated netlbl_lsm_secattr struct.  Returns zero on
+ * success, negative values on error.
+ *
+ */
+static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr)
+{
+	memset(secattr, 0, sizeof(*secattr));
+	return 0;
+}
+
+/**
+ * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct
+ * @secattr: the struct to clear
+ * @clear_cache: cache clear flag
+ *
+ * Description:
+ * Destroys the @secattr struct, including freeing all of the internal buffers.
+ * If @clear_cache is true then free the cache fields, otherwise leave them
+ * intact.  The struct must be reset with a call to netlbl_secattr_init()
+ * before reuse.
+ *
+ */
+static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr,
+					  u32 clear_cache)
+{
+	if (clear_cache && secattr->cache.data != NULL && secattr->cache.free)
+		secattr->cache.free(secattr->cache.data);
+	kfree(secattr->domain);
+	kfree(secattr->mls_cat);
+}
+
+/**
+ * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct
+ * @flags: the memory allocation flags
+ *
+ * Description:
+ * Allocate and initialize a netlbl_lsm_secattr struct.  Returns a valid
+ * pointer on success, or NULL on failure.
+ *
+ */
+static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags)
+{
+	return kzalloc(sizeof(struct netlbl_lsm_secattr), flags);
+}
+
+/**
+ * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct
+ * @secattr: the struct to free
+ * @clear_cache: cache clear flag
+ *
+ * Description:
+ * Frees @secattr including all of the internal buffers.  If @clear_cache is
+ * true then free the cache fields, otherwise leave them intact.
+ *
+ */
+static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
+				       u32 clear_cache)
+{
+	netlbl_secattr_destroy(secattr, clear_cache);
+	kfree(secattr);
+}
+
+/*
+ * LSM protocol operations
+ */
+
+#ifdef CONFIG_NETLABEL
+int netlbl_socket_setattr(const struct socket *sock,
+			  const struct netlbl_lsm_secattr *secattr);
+int netlbl_socket_getattr(const struct socket *sock,
+			  struct netlbl_lsm_secattr *secattr);
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+			  struct netlbl_lsm_secattr *secattr);
+void netlbl_skbuff_err(struct sk_buff *skb, int error);
+#else
+static inline int netlbl_socket_setattr(const struct socket *sock,
+				     const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int netlbl_socket_getattr(const struct socket *sock,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+	return;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * LSM label mapping cache operations
+ */
+
+#ifdef CONFIG_NETLABEL
+void netlbl_cache_invalidate(void);
+int netlbl_cache_add(const struct sk_buff *skb,
+		     const struct netlbl_lsm_secattr *secattr);
+#else
+static inline void netlbl_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int netlbl_cache_add(const struct sk_buff *skb,
+				   const struct netlbl_lsm_secattr *secattr)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif /* _NETLABEL_H */
diff -puN include/net/netlink.h~git-net include/net/netlink.h
--- a/include/net/netlink.h~git-net
+++ a/include/net/netlink.h
@@ -35,12 +35,15 @@
  *   nlmsg_put()			add a netlink message to an skb
  *   nlmsg_put_answer()			callback based nlmsg_put()
  *   nlmsg_end()			finanlize netlink message
+ *   nlmsg_get_pos()			return current position in message
+ *   nlmsg_trim()			trim part of message
  *   nlmsg_cancel()			cancel message construction
  *   nlmsg_free()			free a netlink message
  *
  * Message Sending:
  *   nlmsg_multicast()			multicast message to several groups
  *   nlmsg_unicast()			unicast a message to a single socket
+ *   nlmsg_notify()			send notification message
  *
  * Message Length Calculations:
  *   nlmsg_msg_size(payload)		length of message w/o padding
@@ -62,6 +65,9 @@
  *   nlmsg_validate()			validate netlink message incl. attrs
  *   nlmsg_for_each_attr()		loop over all attributes
  *
+ * Misc:
+ *   nlmsg_report()			report back to application?
+ *
  * ------------------------------------------------------------------------
  *                          Attributes Interface
  * ------------------------------------------------------------------------
@@ -80,8 +86,10 @@
  *   struct nlattr			netlink attribtue header
  *
  * Attribute Construction:
- *   nla_reserve(skb, type, len)	reserve skb tailroom for an attribute
+ *   nla_reserve(skb, type, len)	reserve room for an attribute
+ *   nla_reserve_nohdr(skb, len)	reserve room for an attribute w/o hdr
  *   nla_put(skb, type, len, data)	add attribute to skb
+ *   nla_put_nohdr(skb, len, data)	add attribute w/o hdr
  *
  * Attribute Construction for Basic Types:
  *   nla_put_u8(skb, type, value)	add u8 attribute to skb
@@ -139,6 +147,7 @@
  *   nla_next(nla, remaining)		get next netlink attribute
  *   nla_validate()			validate a stream of attributes
  *   nla_find()				find attribute in stream of attributes
+ *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
  *   nla_parse_nested()			parse nested attribuets
  *   nla_for_each_attr()		loop over all attributes
@@ -183,11 +192,24 @@ struct nla_policy {
 	u16		minlen;
 };
 
+/**
+ * struct nl_info - netlink source information
+ * @nlh: Netlink message header of original request
+ * @pid: Netlink PID of requesting application
+ */
+struct nl_info {
+	struct nlmsghdr		*nlh;
+	u32			pid;
+};
+
 extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
 					  int (*cb)(struct sk_buff *,
 						    struct nlmsghdr *, int *));
 extern void		netlink_queue_skip(struct nlmsghdr *nlh,
 					   struct sk_buff *skb);
+extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
+				     u32 pid, unsigned int group, int report,
+				     gfp_t flags);
 
 extern int		nla_validate(struct nlattr *head, int len, int maxtype,
 				     struct nla_policy *policy);
@@ -203,12 +225,18 @@ extern int		nla_memcmp(const struct nlat
 extern int		nla_strcmp(const struct nlattr *nla, const char *str);
 extern struct nlattr *	__nla_reserve(struct sk_buff *skb, int attrtype,
 				      int attrlen);
+extern void *		__nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
 extern struct nlattr *	nla_reserve(struct sk_buff *skb, int attrtype,
 				    int attrlen);
+extern void *		nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
 extern void		__nla_put(struct sk_buff *skb, int attrtype,
 				  int attrlen, const void *data);
+extern void		__nla_put_nohdr(struct sk_buff *skb, int attrlen,
+					const void *data);
 extern int		nla_put(struct sk_buff *skb, int attrtype,
 				int attrlen, const void *data);
+extern int		nla_put_nohdr(struct sk_buff *skb, int attrlen,
+				      const void *data);
 
 /**************************************************************************
  * Netlink Messages
@@ -364,6 +392,17 @@ static inline int nlmsg_validate(struct 
 }
 
 /**
+ * nlmsg_report - need to report back to application?
+ * @nlh: netlink message header
+ *
+ * Returns 1 if a report back to the application is requested.
+ */
+static inline int nlmsg_report(struct nlmsghdr *nlh)
+{
+	return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+}
+
+/**
  * nlmsg_for_each_attr - iterate over a stream of attributes
  * @pos: loop counter, set to current attribute
  * @nlh: netlink message header
@@ -453,12 +492,13 @@ static inline struct nlmsghdr *nlmsg_put
 /**
  * nlmsg_new - Allocate a new netlink message
  * @size: maximum size of message
+ * @flags: the type of memory to allocate.
  *
  * Use NLMSG_GOODSIZE if size isn't know and you need a good default size.
  */
-static inline struct sk_buff *nlmsg_new(int size)
+static inline struct sk_buff *nlmsg_new(int size, gfp_t flags)
 {
-	return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	return alloc_skb(size, flags);
 }
 
 /**
@@ -480,6 +520,32 @@ static inline int nlmsg_end(struct sk_bu
 }
 
 /**
+ * nlmsg_get_pos - return current position in netlink message
+ * @skb: socket buffer the message is stored in
+ *
+ * Returns a pointer to the current tail of the message.
+ */
+static inline void *nlmsg_get_pos(struct sk_buff *skb)
+{
+	return skb->tail;
+}
+
+/**
+ * nlmsg_trim - Trim message to a mark
+ * @skb: socket buffer the message is stored in
+ * @mark: mark to trim to
+ *
+ * Trims the message to the provided mark. Returns -1.
+ */
+static inline int nlmsg_trim(struct sk_buff *skb, void *mark)
+{
+	if (mark)
+		skb_trim(skb, (unsigned char *) mark - skb->data);
+
+	return -1;
+}
+
+/**
  * nlmsg_cancel - Cancel construction of a netlink message
  * @skb: socket buffer the message is stored in
  * @nlh: netlink message header
@@ -489,9 +555,7 @@ static inline int nlmsg_end(struct sk_bu
  */
 static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	skb_trim(skb, (unsigned char *) nlh - skb->data);
-
-	return -1;
+	return nlmsg_trim(skb, nlh);
 }
 
 /**
@@ -509,15 +573,16 @@ static inline void nlmsg_free(struct sk_
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
+ * @flags: allocation flags
  */
 static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
-				  u32 pid, unsigned int group)
+				  u32 pid, unsigned int group, gfp_t flags)
 {
 	int err;
 
 	NETLINK_CB(skb).dst_group = group;
 
-	err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL);
+	err = netlink_broadcast(sk, skb, pid, group, flags);
 	if (err > 0)
 		err = 0;
 
@@ -631,6 +696,18 @@ static inline struct nlattr *nla_next(co
 }
 
 /**
+ * nla_find_nested - find attribute in a set of nested attributes
+ * @nla: attribute containing the nested attributes
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute which matches the specified type.
+ */
+static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
+{
+	return nla_find(nla_data(nla), nla_len(nla), attrtype);
+}
+
+/**
  * nla_parse_nested - parse nested attributes
  * @tb: destination array with maxtype+1 elements
  * @maxtype: maximum attribute type to be expected
@@ -862,10 +939,7 @@ static inline int nla_nest_end(struct sk
  */
 static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 {
-	if (start)
-		skb_trim(skb, (unsigned char *) start - skb->data);
-
-	return -1;
+	return nlmsg_trim(skb, start);
 }
 
 /**
@@ -880,4 +954,13 @@ static inline int nla_nest_cancel(struct
 	     nla_ok(pos, rem); \
 	     pos = nla_next(pos, &(rem)))
 
+/**
+ * nla_for_each_nested - iterate over nested attributes
+ * @pos: loop counter, set to current attribute
+ * @nla: attribute containing the nested attributes
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_nested(pos, nla, rem) \
+	nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)
+
 #endif
diff -puN /dev/null include/net/nexthop.h
--- /dev/null
+++ a/include/net/nexthop.h
@@ -0,0 +1,33 @@
+#ifndef __NET_NEXTHOP_H
+#define __NET_NEXTHOP_H
+
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+
+static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining)
+{
+	return remaining >= sizeof(*rtnh) &&
+	       rtnh->rtnh_len >= sizeof(*rtnh) &&
+	       rtnh->rtnh_len <= remaining;
+}
+
+static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh,
+                                         int *remaining)
+{
+	int totlen = NLA_ALIGN(rtnh->rtnh_len);
+
+	*remaining -= totlen;
+	return (struct rtnexthop *) ((char *) rtnh + totlen);
+}
+
+static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh)
+{
+	return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh)));
+}
+
+static inline int rtnh_attrlen(const struct rtnexthop *rtnh)
+{
+	return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh));
+}
+
+#endif
diff -puN include/net/request_sock.h~git-net include/net/request_sock.h
--- a/include/net/request_sock.h~git-net
+++ a/include/net/request_sock.h
@@ -53,6 +53,7 @@ struct request_sock {
 	unsigned long			expires;
 	struct request_sock_ops		*rsk_ops;
 	struct sock			*sk;
+	u32				secid;
 };
 
 static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops)
diff -puN include/net/route.h~git-net include/net/route.h
--- a/include/net/route.h~git-net
+++ a/include/net/route.h
@@ -32,6 +32,7 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
+#include <linux/security.h>
 
 #ifndef __KERNEL__
 #warning This file is not supposed to be used outside of kernel.
@@ -166,6 +167,7 @@ static inline int ip_route_connect(struc
 		ip_rt_put(*rp);
 		*rp = NULL;
 	}
+	security_sk_classify_flow(sk, &fl);
 	return ip_route_output_flow(rp, &fl, sk, 0);
 }
 
@@ -182,6 +184,7 @@ static inline int ip_route_newports(stru
 		fl.proto = protocol;
 		ip_rt_put(*rp);
 		*rp = NULL;
+		security_sk_classify_flow(sk, &fl);
 		return ip_route_output_flow(rp, &fl, sk, 0);
 	}
 	return 0;
diff -puN include/net/sock.h~git-net include/net/sock.h
--- a/include/net/sock.h~git-net
+++ a/include/net/sock.h
@@ -969,9 +969,23 @@ static inline void sock_graft(struct soc
 	sk->sk_sleep = &parent->wait;
 	parent->sk = sk;
 	sk->sk_socket = parent;
+	security_sock_graft(sk, parent);
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
+static inline void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+	void *sptr = nsk->sk_security;
+#endif
+
+	memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+	nsk->sk_security = sptr;
+	security_sk_clone(osk, nsk);
+#endif
+}
+
 extern int sock_i_uid(struct sock *sk);
 extern unsigned long sock_i_ino(struct sock *sk);
 
diff -puN include/net/xfrm.h~git-net include/net/xfrm.h
--- a/include/net/xfrm.h~git-net
+++ a/include/net/xfrm.h
@@ -363,7 +363,7 @@ struct xfrm_mgr
 	char			*id;
 	int			(*notify)(struct xfrm_state *x, struct km_event *c);
 	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
-	struct xfrm_policy	*(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir);
+	struct xfrm_policy	*(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
 };
diff -puN net/Kconfig~git-net net/Kconfig
--- a/net/Kconfig~git-net
+++ a/net/Kconfig
@@ -249,6 +249,11 @@ source "net/ieee80211/Kconfig"
 config WIRELESS_EXT
 	bool
 
+source "net/netlabel/Kconfig"
+
+config FIB_RULES
+	bool
+
 endif   # if NET
 endmenu # Networking
 
diff -puN net/Makefile~git-net net/Makefile
--- a/net/Makefile~git-net
+++ a/net/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP)		+= dccp/
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_IEEE80211)		+= ieee80211/
 obj-$(CONFIG_TIPC)		+= tipc/
+obj-$(CONFIG_NETLABEL)		+= netlabel/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff -puN net/atm/atm_sysfs.c~git-net net/atm/atm_sysfs.c
--- a/net/atm/atm_sysfs.c~git-net
+++ a/net/atm/atm_sysfs.c
@@ -1,6 +1,5 @@
 /* ATM driver model support. */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
diff -puN net/bridge/br_netlink.c~git-net net/bridge/br_netlink.c
--- a/net/bridge/br_netlink.c~git-net
+++ a/net/bridge/br_netlink.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/rtnetlink.h>
+#include <net/netlink.h>
 #include "br_private.h"
 
 /*
@@ -76,26 +77,24 @@ rtattr_failure:
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
 	struct sk_buff *skb;
-	int err = -ENOMEM;
+	int payload = sizeof(struct ifinfomsg) + 128;
+	int err = -ENOBUFS;
 
 	pr_debug("bridge notify event=%d\n", event);
-	skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128),
-			GFP_ATOMIC);
-	if (!skb)
-		goto err_out;
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
 
-	err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0);
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+errout:
 	if (err < 0)
-		goto err_kfree;
-
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
-	return;
-
-err_kfree:
-	kfree_skb(skb);
-err_out:
-	netlink_set_err(rtnl, 0, RTNLGRP_LINK, err);
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /*
diff -puN net/bridge/netfilter/ebtables.c~git-net net/bridge/netfilter/ebtables.c
--- a/net/bridge/netfilter/ebtables.c~git-net
+++ a/net/bridge/netfilter/ebtables.c
@@ -37,30 +37,9 @@
 #include <linux/netfilter_ipv4/listhelp.h>
 #include <linux/mutex.h>
 
-#if 0
-/* use this for remote debugging
- * Copyright (C) 1998 by Ori Pomerantz
- * Print the string to the appropriate tty, the one
- * the current task uses
- */
-static void print_string(char *str)
-{
-	struct tty_struct *my_tty;
-
-	/* The tty for the current task */
-	my_tty = current->signal->tty;
-	if (my_tty != NULL) {
-		my_tty->driver->write(my_tty, 0, str, strlen(str));
-		my_tty->driver->write(my_tty, 0, "\015\012", 2);
-	}
-}
-
-#define BUGPRINT(args) print_string(args);
-#else
 #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
                                          "report to author: "format, ## args)
 /* #define BUGPRINT(format, args...) */
-#endif
 #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
                                          ": out of memory: "format, ## args)
 /* #define MEMPRINT(format, args...) */
diff -puN net/core/Makefile~git-net net/core/Makefile
--- a/net/core/Makefile~git-net
+++ a/net/core/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_WIRELESS_EXT) += wireless.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff -puN net/core/datagram.c~git-net net/core/datagram.c
--- a/net/core/datagram.c~git-net
+++ a/net/core/datagram.c
@@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(str
 
 	sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
 	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(str
 			goto fault;
 		if ((unsigned short)csum_fold(csum))
 			goto csum_error;
-		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 		iov->iov_len -= chunk;
 		iov->iov_base += chunk;
diff -puN net/core/dev.c~git-net net/core/dev.c
--- a/net/core/dev.c~git-net
+++ a/net/core/dev.c
@@ -636,7 +636,8 @@ struct net_device * dev_get_by_flags(uns
  */
 int dev_valid_name(const char *name)
 {
-	return !(*name == '\0' 
+	return !(*name == '\0'
+		 || strlen(name) >= IFNAMSIZ
 		 || !strcmp(name, ".")
 		 || !strcmp(name, "..")
 		 || strchr(name, '/'));
@@ -1157,12 +1158,12 @@ EXPORT_SYMBOL(netif_device_attach);
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
  */
-int skb_checksum_help(struct sk_buff *skb, int inward)
+int skb_checksum_help(struct sk_buff *skb)
 {
 	unsigned int csum;
 	int ret = 0, offset = skb->h.raw - skb->data;
 
-	if (inward)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
 
 	if (unlikely(skb_shinfo(skb)->gso_size)) {
@@ -1214,7 +1215,7 @@ struct sk_buff *skb_gso_segment(struct s
 	skb->mac_len = skb->nh.raw - skb->data;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
@@ -1223,7 +1224,7 @@ struct sk_buff *skb_gso_segment(struct s
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
-			if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				err = ptype->gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -1435,11 +1436,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 	/* If packet is not checksummed and device does not support
 	 * checksumming for this protocol, complete checksumming here.
 	 */
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    (!(dev->features & NETIF_F_GEN_CSUM) &&
 	     (!(dev->features & NETIF_F_IP_CSUM) ||
 	      skb->protocol != htons(ETH_P_IP))))
-	      	if (skb_checksum_help(skb, 0))
+	      	if (skb_checksum_help(skb))
 	      		goto out_kfree_skb;
 
 gso:
@@ -3198,13 +3199,15 @@ struct net_device *alloc_netdev(int size
 	struct net_device *dev;
 	int alloc_size;
 
+	BUG_ON(strlen(name) >= sizeof(dev->name));
+
 	/* ensure 32-byte alignment of both the device and private area */
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
-		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
 		return NULL;
 	}
 
diff -puN net/core/dev_mcast.c~git-net net/core/dev_mcast.c
--- a/net/core/dev_mcast.c~git-net
+++ a/net/core/dev_mcast.c
@@ -21,8 +21,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h> 
-#include <linux/module.h> 
+#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
diff -puN net/core/dst.c~git-net net/core/dst.c
--- a/net/core/dst.c~git-net
+++ a/net/core/dst.c
@@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dum
 		dst_gc_timer_inc = DST_GC_INC;
 		dst_gc_timer_expires = DST_GC_MIN;
 	}
-	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
 #if RT_CACHE_DEBUG >= 2
 	printk("dst_total: %d/%d %ld\n",
 	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
 #endif
-	add_timer(&dst_gc_timer);
+	mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
 
 out:
 	spin_unlock(&dst_lock);
diff -puN /dev/null net/core/fib_rules.c
--- /dev/null
+++ a/net/core/fib_rules.c
@@ -0,0 +1,427 @@
+/*
+ * net/core/fib_rules.c		Generic Routing Rules
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <net/fib_rules.h>
+
+static LIST_HEAD(rules_ops);
+static DEFINE_SPINLOCK(rules_mod_lock);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid);
+
+static struct fib_rules_ops *lookup_rules_ops(int family)
+{
+	struct fib_rules_ops *ops;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &rules_ops, list) {
+		if (ops->family == family) {
+			if (!try_module_get(ops->owner))
+				ops = NULL;
+			rcu_read_unlock();
+			return ops;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void rules_ops_put(struct fib_rules_ops *ops)
+{
+	if (ops)
+		module_put(ops->owner);
+}
+
+int fib_rules_register(struct fib_rules_ops *ops)
+{
+	int err = -EEXIST;
+	struct fib_rules_ops *o;
+
+	if (ops->rule_size < sizeof(struct fib_rule))
+		return -EINVAL;
+
+	if (ops->match == NULL || ops->configure == NULL ||
+	    ops->compare == NULL || ops->fill == NULL ||
+	    ops->action == NULL)
+		return -EINVAL;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list)
+		if (ops->family == o->family)
+			goto errout;
+
+	list_add_tail_rcu(&ops->list, &rules_ops);
+	err = 0;
+errout:
+	spin_unlock(&rules_mod_lock);
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_register);
+
+static void cleanup_ops(struct fib_rules_ops *ops)
+{
+	struct fib_rule *rule, *tmp;
+
+	list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+		list_del_rcu(&rule->list);
+		fib_rule_put(rule);
+	}
+}
+
+int fib_rules_unregister(struct fib_rules_ops *ops)
+{
+	int err = 0;
+	struct fib_rules_ops *o;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list) {
+		if (o == ops) {
+			list_del_rcu(&o->list);
+			cleanup_ops(ops);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	spin_unlock(&rules_mod_lock);
+
+	synchronize_rcu();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_unregister);
+
+int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
+		     int flags, struct fib_lookup_arg *arg)
+{
+	struct fib_rule *rule;
+	int err;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+		if (rule->ifindex && (rule->ifindex != fl->iif))
+			continue;
+
+		if (!ops->match(rule, fl, flags))
+			continue;
+
+		err = ops->action(rule, fl, flags, arg);
+		if (err != -EAGAIN) {
+			fib_rule_get(rule);
+			arg->rule = rule;
+			goto out;
+		}
+	}
+
+	err = -ENETUNREACH;
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_lookup);
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule, *r, *last = NULL;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ)
+		goto errout;
+
+	rule = kzalloc(ops->rule_size, GFP_KERNEL);
+	if (rule == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	if (tb[FRA_PRIORITY])
+		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+
+	if (tb[FRA_IFNAME]) {
+		struct net_device *dev;
+
+		rule->ifindex = -1;
+		if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME],
+				IFNAMSIZ) >= IFNAMSIZ)
+			goto errout_free;
+
+		dev = __dev_get_by_name(rule->ifname);
+		if (dev)
+			rule->ifindex = dev->ifindex;
+	}
+
+	rule->action = frh->action;
+	rule->flags = frh->flags;
+	rule->table = frh_get_table(frh, tb);
+
+	if (!rule->pref && ops->default_pref)
+		rule->pref = ops->default_pref();
+
+	err = ops->configure(rule, skb, nlh, frh, tb);
+	if (err < 0)
+		goto errout_free;
+
+	list_for_each_entry(r, ops->rules_list, list) {
+		if (r->pref > rule->pref)
+			break;
+		last = r;
+	}
+
+	fib_rule_get(rule);
+
+	if (last)
+		list_add_rcu(&rule->list, &last->list);
+	else
+		list_add_rcu(&rule->list, ops->rules_list);
+
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	rules_ops_put(ops);
+	return 0;
+
+errout_free:
+	kfree(rule);
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (frh->action && (frh->action != rule->action))
+			continue;
+
+		if (frh->table && (frh_get_table(frh, tb) != rule->table))
+			continue;
+
+		if (tb[FRA_PRIORITY] &&
+		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
+			continue;
+
+		if (tb[FRA_IFNAME] &&
+		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+			continue;
+
+		if (!ops->compare(rule, frh, tb))
+			continue;
+
+		if (rule->flags & FIB_RULE_PERMANENT) {
+			err = -EPERM;
+			goto errout;
+		}
+
+		list_del_rcu(&rule->list);
+		synchronize_rcu();
+		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+				   NETLINK_CB(skb).pid);
+		fib_rule_put(rule);
+		rules_ops_put(ops);
+		return 0;
+	}
+
+	err = -ENOENT;
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
+			    u32 pid, u32 seq, int type, int flags,
+			    struct fib_rules_ops *ops)
+{
+	struct nlmsghdr *nlh;
+	struct fib_rule_hdr *frh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
+	if (nlh == NULL)
+		return -1;
+
+	frh = nlmsg_data(nlh);
+	frh->table = rule->table;
+	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
+	frh->res1 = 0;
+	frh->res2 = 0;
+	frh->action = rule->action;
+	frh->flags = rule->flags;
+
+	if (rule->ifname[0])
+		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+
+	if (rule->pref)
+		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+
+	if (ops->fill(rule, skb, nlh, frh) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
+}
+
+int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+{
+	int idx = 0;
+	struct fib_rule *rule;
+	struct fib_rules_ops *ops;
+
+	ops = lookup_rules_ops(family);
+	if (ops == NULL)
+		return -EAFNOSUPPORT;
+
+	rcu_read_lock();
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (idx < cb->args[0])
+			goto skip;
+
+		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
+				     NLM_F_MULTI, ops) < 0)
+			break;
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	rules_ops_put(ops);
+
+	return skb->len;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(ops->nlgroup, err);
+}
+
+static void attach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list) {
+		if (rule->ifindex == -1 &&
+		    strcmp(dev->name, rule->ifname) == 0)
+			rule->ifindex = dev->ifindex;
+	}
+}
+
+static void detach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list)
+		if (rule->ifindex == dev->ifindex)
+			rule->ifindex = -1;
+}
+
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+	rcu_read_lock();
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			attach_rules(ops->rules_list, dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			detach_rules(ops->rules_list, dev);
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_rules_notifier = {
+	.notifier_call = fib_rules_event,
+};
+
+static int __init fib_rules_init(void)
+{
+	return register_netdevice_notifier(&fib_rules_notifier);
+}
+
+subsys_initcall(fib_rules_init);
diff -puN net/core/flow.c~git-net net/core/flow.c
--- a/net/core/flow.c~git-net
+++ a/net/core/flow.c
@@ -32,7 +32,6 @@ struct flow_cache_entry {
 	u8			dir;
 	struct flowi		key;
 	u32			genid;
-	u32			sk_sid;
 	void			*object;
 	atomic_t		*object_ref;
 };
@@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *ke
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
-		    fle->sk_sid == sk_sid &&
 		    flow_key_compare(key, &fle->key) == 0) {
 			if (fle->genid == atomic_read(&flow_cache_genid)) {
 				void *ret = fle->object;
@@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *ke
 			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
-			fle->sk_sid = sk_sid;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
 			flow_count(cpu)++;
@@ -226,7 +223,7 @@ nocache:
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, sk_sid, family, dir, &obj, &obj_ref);
+		resolver(key, family, dir, &obj, &obj_ref);
 
 		if (fle) {
 			fle->genid = atomic_read(&flow_cache_genid);
diff -puN net/core/neighbour.c~git-net net/core/neighbour.c
--- a/net/core/neighbour.c~git-net
+++ a/net/core/neighbour.c
@@ -30,6 +30,7 @@
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/netevent.h>
+#include <net/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
@@ -1437,48 +1438,62 @@ int neigh_table_clear(struct neigh_table
 
 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
-	struct rtattr **nda = arg;
+	struct ndmsg *ndm;
+	struct nlattr *dst_attr;
 	struct neigh_table *tbl;
 	struct net_device *dev = NULL;
-	int err = -ENODEV;
+	int err = -EINVAL;
+
+	if (nlmsg_len(nlh) < sizeof(*ndm))
+		goto out;
 
-	if (ndm->ndm_ifindex &&
-	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+	if (dst_attr == NULL)
 		goto out;
 
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = dev_get_by_index(ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+	}
+
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-		struct rtattr *dst_attr = nda[NDA_DST - 1];
-		struct neighbour *n;
+		struct neighbour *neigh;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
-		err = -EINVAL;
-		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+		if (nla_len(dst_attr) < tbl->key_len)
 			goto out_dev_put;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+			err = pneigh_delete(tbl, nla_data(dst_attr), dev);
 			goto out_dev_put;
 		}
 
-		if (!dev)
-			goto out;
+		if (dev == NULL)
+			goto out_dev_put;
 
-		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-		if (n) {
-			err = neigh_update(n, NULL, NUD_FAILED, 
-					   NEIGH_UPDATE_F_OVERRIDE|
-					   NEIGH_UPDATE_F_ADMIN);
-			neigh_release(n);
+		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+		if (neigh == NULL) {
+			err = -ENOENT;
+			goto out_dev_put;
 		}
+
+		err = neigh_update(neigh, NULL, NUD_FAILED,
+				   NEIGH_UPDATE_F_OVERRIDE |
+				   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
 		goto out_dev_put;
 	}
 	read_unlock(&neigh_tbl_lock);
-	err = -EADDRNOTAVAIL;
+	err = -EAFNOSUPPORT;
+
 out_dev_put:
 	if (dev)
 		dev_put(dev);
@@ -1488,76 +1503,88 @@ out:
 
 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
-	struct rtattr **nda = arg;
+	struct ndmsg *ndm;
+	struct nlattr *tb[NDA_MAX+1];
 	struct neigh_table *tbl;
 	struct net_device *dev = NULL;
-	int err = -ENODEV;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		goto out;
 
-	if (ndm->ndm_ifindex &&
-	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+	err = -EINVAL;
+	if (tb[NDA_DST] == NULL)
 		goto out;
 
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = dev_get_by_index(ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+			goto out_dev_put;
+	}
+
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-		struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
-		struct rtattr *dst_attr = nda[NDA_DST - 1];
-		int override = 1;
-		struct neighbour *n;
+		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+		struct neighbour *neigh;
+		void *dst, *lladdr;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
-		err = -EINVAL;
-		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+		if (nla_len(tb[NDA_DST]) < tbl->key_len)
 			goto out_dev_put;
+		dst = nla_data(tb[NDA_DST]);
+		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = -ENOBUFS;
-			if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
-				err = 0;
+			err = 0;
+			if (pneigh_lookup(tbl, dst, dev, 1) == NULL)
+				err = -ENOBUFS;
 			goto out_dev_put;
 		}
 
-		err = -EINVAL;
-		if (!dev)
-			goto out;
-		if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
+		if (dev == NULL)
 			goto out_dev_put;
+
+		neigh = neigh_lookup(tbl, dst, dev);
+		if (neigh == NULL) {
+			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+				err = -ENOENT;
+				goto out_dev_put;
+			}
 	
-		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-		if (n) {
-			if (nlh->nlmsg_flags & NLM_F_EXCL) {
-				err = -EEXIST;
-				neigh_release(n);
+			neigh = __neigh_lookup_errno(tbl, dst, dev);
+			if (IS_ERR(neigh)) {
+				err = PTR_ERR(neigh);
 				goto out_dev_put;
 			}
-			
-			override = nlh->nlmsg_flags & NLM_F_REPLACE;
-		} else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
-			err = -ENOENT;
-			goto out_dev_put;
 		} else {
-			n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
-			if (IS_ERR(n)) {
-				err = PTR_ERR(n);
+			if (nlh->nlmsg_flags & NLM_F_EXCL) {
+				err = -EEXIST;
+				neigh_release(neigh);
 				goto out_dev_put;
 			}
-		}
 
-		err = neigh_update(n,
-				   lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
-				   ndm->ndm_state,
-				   (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
-				   NEIGH_UPDATE_F_ADMIN);
+			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+		}
 
-		neigh_release(n);
+		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+		neigh_release(neigh);
 		goto out_dev_put;
 	}
 
 	read_unlock(&neigh_tbl_lock);
-	err = -EADDRNOTAVAIL;
+	err = -EAFNOSUPPORT;
+
 out_dev_put:
 	if (dev)
 		dev_put(dev);
@@ -1567,56 +1594,59 @@ out:
 
 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 {
-	struct rtattr *nest = NULL;
-	
-	nest = RTA_NEST(skb, NDTA_PARMS);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NDTA_PARMS);
+	if (nest == NULL)
+		return -ENOBUFS;
 
 	if (parms->dev)
-		RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
 
-	RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
-	RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
-	RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
-	RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
-	RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
-	RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
-	RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
-	RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
 		      parms->base_reachable_time);
-	RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
-	RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
-	RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
-	RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
-	RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
-	RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
 
-	return RTA_NEST_END(skb, nest);
+	return nla_nest_end(skb, nest);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, nest);
+nla_put_failure:
+	return nla_nest_cancel(skb, nest);
 }
 
-static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
-			      struct netlink_callback *cb)
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+			      u32 pid, u32 seq, int type, int flags)
 {
 	struct nlmsghdr *nlh;
 	struct ndtmsg *ndtmsg;
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-			       NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndtmsg = NLMSG_DATA(nlh);
+	ndtmsg = nlmsg_data(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
 	ndtmsg->ndtm_pad1   = 0;
 	ndtmsg->ndtm_pad2   = 0;
 
-	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
-	RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
-	RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
-	RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
-	RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
 
 	{
 		unsigned long now = jiffies;
@@ -1635,7 +1665,7 @@ static int neightbl_fill_info(struct nei
 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
 		};
 
-		RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
 	}
 
 	{
@@ -1660,55 +1690,50 @@ static int neightbl_fill_info(struct nei
 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
 		}
 
-		RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
 	}
 
 	BUG_ON(tbl->parms.dev);
 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_END(skb, nlh);
+	return nlmsg_end(skb, nlh);
 
-rtattr_failure:
+nla_put_failure:
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_CANCEL(skb, nlh);
- 
-nlmsg_failure:
-	return -1;
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int neightbl_fill_param_info(struct neigh_table *tbl,
+static int neightbl_fill_param_info(struct sk_buff *skb,
+				    struct neigh_table *tbl,
 				    struct neigh_parms *parms,
-				    struct sk_buff *skb,
-				    struct netlink_callback *cb)
+				    u32 pid, u32 seq, int type,
+				    unsigned int flags)
 {
 	struct ndtmsg *ndtmsg;
 	struct nlmsghdr *nlh;
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-			       NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndtmsg = NLMSG_DATA(nlh);
+	ndtmsg = nlmsg_data(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
 	ndtmsg->ndtm_pad1   = 0;
 	ndtmsg->ndtm_pad2   = 0;
-	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
 
-	if (neightbl_fill_parms(skb, parms) < 0)
-		goto rtattr_failure;
+	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
+	    neightbl_fill_parms(skb, parms) < 0)
+		goto errout;
 
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_END(skb, nlh);
-
-rtattr_failure:
+	return nlmsg_end(skb, nlh);
+errout:
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_CANCEL(skb, nlh);
-
-nlmsg_failure:
-	return -1;
+	return nlmsg_cancel(skb, nlh);
 }
  
 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
@@ -1724,28 +1749,61 @@ static inline struct neigh_parms *lookup
 	return NULL;
 }
 
+static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+	[NDTA_NAME]		= { .type = NLA_STRING },
+	[NDTA_THRESH1]		= { .type = NLA_U32 },
+	[NDTA_THRESH2]		= { .type = NLA_U32 },
+	[NDTA_THRESH3]		= { .type = NLA_U32 },
+	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
+	[NDTA_PARMS]		= { .type = NLA_NESTED },
+};
+
+static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
+	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
+	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
+	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
+	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
+	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
+};
+
 int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct neigh_table *tbl;
-	struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
-	struct rtattr **tb = arg;
-	int err = -EINVAL;
+	struct ndtmsg *ndtmsg;
+	struct nlattr *tb[NDTA_MAX+1];
+	int err;
 
-	if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+			  nl_neightbl_policy);
+	if (err < 0)
+		goto errout;
 
+	if (tb[NDTA_NAME] == NULL) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	ndtmsg = nlmsg_data(nlh);
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
 			continue;
 
-		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
+		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
 			break;
 	}
 
 	if (tbl == NULL) {
 		err = -ENOENT;
-		goto errout;
+		goto errout_locked;
 	}
 
 	/* 
@@ -1754,165 +1812,178 @@ int neightbl_set(struct sk_buff *skb, st
 	 */
 	write_lock_bh(&tbl->lock);
 
-	if (tb[NDTA_THRESH1 - 1])
-		tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
-
-	if (tb[NDTA_THRESH2 - 1])
-		tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
-
-	if (tb[NDTA_THRESH3 - 1])
-		tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
-
-	if (tb[NDTA_GC_INTERVAL - 1])
-		tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
-
-	if (tb[NDTA_PARMS - 1]) {
-		struct rtattr *tbp[NDTPA_MAX];
+	if (tb[NDTA_PARMS]) {
+		struct nlattr *tbp[NDTPA_MAX+1];
 		struct neigh_parms *p;
-		u32 ifindex = 0;
+		int i, ifindex = 0;
 
-		if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
-			goto rtattr_failure;
+		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+				       nl_ntbl_parm_policy);
+		if (err < 0)
+			goto errout_tbl_lock;
 
-		if (tbp[NDTPA_IFINDEX - 1])
-			ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
+		if (tbp[NDTPA_IFINDEX])
+			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
 		p = lookup_neigh_params(tbl, ifindex);
 		if (p == NULL) {
 			err = -ENOENT;
-			goto rtattr_failure;
+			goto errout_tbl_lock;
 		}
-	
-		if (tbp[NDTPA_QUEUE_LEN - 1])
-			p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
 
-		if (tbp[NDTPA_PROXY_QLEN - 1])
-			p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
-
-		if (tbp[NDTPA_APP_PROBES - 1])
-			p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
-
-		if (tbp[NDTPA_UCAST_PROBES - 1])
-			p->ucast_probes =
-			   RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
-
-		if (tbp[NDTPA_MCAST_PROBES - 1])
-			p->mcast_probes =
-			   RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
-
-		if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
-			p->base_reachable_time =
-			   RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
-
-		if (tbp[NDTPA_GC_STALETIME - 1])
-			p->gc_staletime =
-			   RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
+		for (i = 1; i <= NDTPA_MAX; i++) {
+			if (tbp[i] == NULL)
+				continue;
 
-		if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
-			p->delay_probe_time =
-			   RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
+			switch (i) {
+			case NDTPA_QUEUE_LEN:
+				p->queue_len = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_PROXY_QLEN:
+				p->proxy_qlen = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_APP_PROBES:
+				p->app_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_UCAST_PROBES:
+				p->ucast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_MCAST_PROBES:
+				p->mcast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_BASE_REACHABLE_TIME:
+				p->base_reachable_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_GC_STALETIME:
+				p->gc_staletime = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_DELAY_PROBE_TIME:
+				p->delay_probe_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_RETRANS_TIME:
+				p->retrans_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_ANYCAST_DELAY:
+				p->anycast_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_PROXY_DELAY:
+				p->proxy_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_LOCKTIME:
+				p->locktime = nla_get_msecs(tbp[i]);
+				break;
+			}
+		}
+	}
 
-		if (tbp[NDTPA_RETRANS_TIME - 1])
-			p->retrans_time =
-			   RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
+	if (tb[NDTA_THRESH1])
+		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
 
-		if (tbp[NDTPA_ANYCAST_DELAY - 1])
-			p->anycast_delay =
-			   RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
+	if (tb[NDTA_THRESH2])
+		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
 
-		if (tbp[NDTPA_PROXY_DELAY - 1])
-			p->proxy_delay =
-			   RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
+	if (tb[NDTA_THRESH3])
+		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
 
-		if (tbp[NDTPA_LOCKTIME - 1])
-			p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
-	}
+	if (tb[NDTA_GC_INTERVAL])
+		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
 
 	err = 0;
 
-rtattr_failure:
+errout_tbl_lock:
 	write_unlock_bh(&tbl->lock);
-errout:
+errout_locked:
 	read_unlock(&neigh_tbl_lock);
+errout:
 	return err;
 }
 
 int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx, family;
-	int s_idx = cb->args[0];
+	int family, tidx, nidx = 0;
+	int tbl_skip = cb->args[0];
+	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
-	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
 	read_lock(&neigh_tbl_lock);
-	for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
+	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
 		struct neigh_parms *p;
 
-		if (idx < s_idx || (family && tbl->family != family))
+		if (tidx < tbl_skip || (family && tbl->family != family))
 			continue;
 
-		if (neightbl_fill_info(tbl, skb, cb) <= 0)
+		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+				       NLM_F_MULTI) <= 0)
 			break;
 
-		for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
-			if (idx < s_idx)
+		for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+			if (nidx < neigh_skip)
 				continue;
 
-			if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
+			if (neightbl_fill_param_info(skb, tbl, p,
+						     NETLINK_CB(cb->skb).pid,
+						     cb->nlh->nlmsg_seq,
+						     RTM_NEWNEIGHTBL,
+						     NLM_F_MULTI) <= 0)
 				goto out;
 		}
 
+		neigh_skip = 0;
 	}
 out:
 	read_unlock(&neigh_tbl_lock);
-	cb->args[0] = idx;
+	cb->args[0] = tidx;
+	cb->args[1] = nidx;
 
 	return skb->len;
 }
 
-static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
-			   u32 pid, u32 seq, int event, unsigned int flags)
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+			   u32 pid, u32 seq, int type, unsigned int flags)
 {
 	unsigned long now = jiffies;
-	unsigned char *b = skb->tail;
 	struct nda_cacheinfo ci;
-	int locked = 0;
-	u32 probes;
-	struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
-					 sizeof(struct ndmsg), flags);
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndm->ndm_family	 = n->ops->family;
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family	 = neigh->ops->family;
 	ndm->ndm_pad1    = 0;
 	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags	 = n->flags;
-	ndm->ndm_type	 = n->type;
-	ndm->ndm_ifindex = n->dev->ifindex;
-	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
-	read_lock_bh(&n->lock);
-	locked		 = 1;
-	ndm->ndm_state	 = n->nud_state;
-	if (n->nud_state & NUD_VALID)
-		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
-	ci.ndm_used	 = now - n->used;
-	ci.ndm_confirmed = now - n->confirmed;
-	ci.ndm_updated	 = now - n->updated;
-	ci.ndm_refcnt	 = atomic_read(&n->refcnt) - 1;
-	probes = atomic_read(&n->probes);
-	read_unlock_bh(&n->lock);
-	locked		 = 0;
-	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
-	RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
-	nlh->nlmsg_len	 = skb->tail - b;
-	return skb->len;
+	ndm->ndm_flags	 = neigh->flags;
+	ndm->ndm_type	 = neigh->type;
+	ndm->ndm_ifindex = neigh->dev->ifindex;
+
+	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
+
+	read_lock_bh(&neigh->lock);
+	ndm->ndm_state	 = neigh->nud_state;
+	if ((neigh->nud_state & NUD_VALID) &&
+	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
+		read_unlock_bh(&neigh->lock);
+		goto nla_put_failure;
+	}
+
+	ci.ndm_used	 = now - neigh->used;
+	ci.ndm_confirmed = now - neigh->confirmed;
+	ci.ndm_updated	 = now - neigh->updated;
+	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
+	read_unlock_bh(&neigh->lock);
+
+	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
+	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
 
-nlmsg_failure:
-rtattr_failure:
-	if (locked)
-		read_unlock_bh(&n->lock);
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 
@@ -1956,7 +2027,7 @@ int neigh_dump_info(struct sk_buff *skb,
 	int t, family, s_t;
 
 	read_lock(&neigh_tbl_lock);
-	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
@@ -2335,41 +2406,35 @@ static struct file_operations neigh_stat
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_ARPD
-void neigh_app_ns(struct neighbour *n)
+static void __neigh_notify(struct neighbour *n, int type, int flags)
 {
-	struct nlmsghdr  *nlh;
-	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
 
-	if (!skb)
-		return;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
+	err = neigh_fill_info(skb, n, 0, 0, type, flags);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	nlh			   = (struct nlmsghdr *)skb->data;
-	nlh->nlmsg_flags	   = NLM_F_REQUEST;
-	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_NEIGH, err);
 }
 
-static void neigh_app_notify(struct neighbour *n)
+void neigh_app_ns(struct neighbour *n)
 {
-	struct nlmsghdr *nlh;
-	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
-
-	if (!skb)
-		return;
+	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+}
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
-		kfree_skb(skb);
-		return;
-	}
-	nlh			   = (struct nlmsghdr *)skb->data;
-	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+static void neigh_app_notify(struct neighbour *n)
+{
+	__neigh_notify(n, RTM_NEWNEIGH, 0);
 }
 
 #endif /* CONFIG_ARPD */
@@ -2383,7 +2448,7 @@ static struct neigh_sysctl_table {
 	ctl_table		neigh_neigh_dir[2];
 	ctl_table		neigh_proto_dir[2];
 	ctl_table		neigh_root_dir[2];
-} neigh_sysctl_template = {
+} neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
 		{
 			.ctl_name	= NET_NEIGH_MCAST_SOLICIT,
diff -puN net/core/netpoll.c~git-net net/core/netpoll.c
--- a/net/core/netpoll.c~git-net
+++ a/net/core/netpoll.c
@@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *
 
 	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !(u16)csum_fold(csum_add(psum, skb->csum)))
 		return 0;
 
diff -puN net/core/pktgen.c~git-net net/core/pktgen.c
--- a/net/core/pktgen.c~git-net
+++ a/net/core/pktgen.c
@@ -2149,6 +2149,8 @@ static struct sk_buff *fill_packet_ipv4(
 	skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32);
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
+	skb->nh.iph = iph;
+	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2460,6 +2462,8 @@ static struct sk_buff *fill_packet_ipv6(
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
+	skb->nh.ipv6h = iph;
+	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
diff -puN net/core/rtnetlink.c~git-net net/core/rtnetlink.c
--- a/net/core/rtnetlink.c~git-net
+++ a/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/security.h>
 #include <linux/mutex.h>
+#include <linux/if_addr.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -49,6 +50,7 @@
 #include <net/udp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
+#include <net/fib_rules.h>
 #include <net/netlink.h>
 #ifdef CONFIG_NET_WIRELESS_RTNETLINK
 #include <linux/wireless.h>
@@ -56,6 +58,7 @@
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
 static DEFINE_MUTEX(rtnl_mutex);
+static struct sock *rtnl;
 
 void rtnl_lock(void)
 {
@@ -93,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], in
 	return 0;
 }
 
-struct sock *rtnl;
-
 struct rtnetlink_link * rtnetlink_links[NPROTO];
 
 static const int rtm_min[RTM_NR_FAMILIES] =
@@ -102,8 +103,7 @@ static const int rtm_min[RTM_NR_FAMILIES
 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
 	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
 	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
-	[RTM_FAM(RTM_NEWNEIGH)]     = NLMSG_LENGTH(sizeof(struct ndmsg)),
-	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
 	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
@@ -111,7 +111,6 @@ static const int rtm_min[RTM_NR_FAMILIES
 	[RTM_FAM(RTM_NEWPREFIX)]    = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NLMSG_LENGTH(sizeof(struct ndtmsg)),
 };
 
 static const int rta_max[RTM_NR_FAMILIES] =
@@ -119,13 +118,11 @@ static const int rta_max[RTM_NR_FAMILIES
 	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
 	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
 	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
-	[RTM_FAM(RTM_NEWNEIGH)]     = NDA_MAX,
-	[RTM_FAM(RTM_NEWRULE)]      = RTA_MAX,
+	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
 	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
 	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
 	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
 	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
-	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NDTA_MAX,
 };
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -168,6 +165,27 @@ int rtnetlink_send(struct sk_buff *skb, 
 	return err;
 }
 
+int rtnl_unicast(struct sk_buff *skb, u32 pid)
+{
+	return nlmsg_unicast(rtnl, skb, pid);
+}
+
+int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		struct nlmsghdr *nlh, gfp_t flags)
+{
+	int report = 0;
+
+	if (nlh)
+		report = nlmsg_report(nlh);
+
+	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+
+void rtnl_set_sk_err(u32 group, int error)
+{
+	netlink_set_err(rtnl, 0, group, error);
+}
+
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
 	struct rtattr *mx = (struct rtattr*)skb->tail;
@@ -216,41 +234,73 @@ static void set_operstate(struct net_dev
 	}
 }
 
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
-				 int type, u32 pid, u32 seq, u32 change, 
-				 unsigned int flags)
-{
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
-
-	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev_get_flags(dev);
-	r->ifi_change = change;
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+				 struct net_device_stats *b)
+{
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+};
 
-	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+			    void *iwbuf, int iwbuflen, int type, u32 pid,
+			    u32 seq, u32 change, unsigned int flags)
+{
+	struct ifinfomsg *ifm;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->__ifi_pad = 0;
+	ifm->ifi_type = dev->type;
+	ifm->ifi_index = dev->ifindex;
+	ifm->ifi_flags = dev_get_flags(dev);
+	ifm->ifi_change = change;
+
+	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+	NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+	NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
+	NLA_PUT_U8(skb, IFLA_OPERSTATE,
+		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
 
-	if (1) {
-		u32 txqlen = dev->tx_queue_len;
-		RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
-	}
+	if (dev->ifindex != dev->iflink)
+		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
 
-	if (1) {
-		u32 weight = dev->weight;
-		RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
-	}
+	if (dev->master)
+		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-	if (1) {
-		u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN;
-		u8 link_mode = dev->link_mode;
-		RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
-		RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
-	}
+	if (dev->qdisc_sleeping)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
 
 	if (1) {
 		struct rtnl_link_ifmap map = {
@@ -261,58 +311,38 @@ static int rtnetlink_fill_ifinfo(struct 
 			.dma         = dev->dma,
 			.port        = dev->if_port,
 		};
-		RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+		NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
 	}
 
 	if (dev->addr_len) {
-		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
-		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
-	}
-
-	if (1) {
-		u32 mtu = dev->mtu;
-		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
-	}
-
-	if (dev->ifindex != dev->iflink) {
-		u32 iflink = dev->iflink;
-		RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
-	}
-
-	if (dev->qdisc_sleeping)
-		RTA_PUT(skb, IFLA_QDISC,
-			strlen(dev->qdisc_sleeping->ops->id) + 1,
-			dev->qdisc_sleeping->ops->id);
-	
-	if (dev->master) {
-		u32 master = dev->master->ifindex;
-		RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+		NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
 	}
 
 	if (dev->get_stats) {
-		unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+		struct net_device_stats *stats = dev->get_stats(dev);
 		if (stats) {
-			struct rtattr  *a;
-			__u32	       *s;
-			int		i;
-			int		n = sizeof(struct rtnl_link_stats)/4;
-
-			a = __RTA_PUT(skb, IFLA_STATS, n*4);
-			s = RTA_DATA(a);
-			for (i=0; i<n; i++)
-				s[i] = stats[i];
+			struct nlattr *attr;
+
+			attr = nla_reserve(skb, IFLA_STATS,
+					   sizeof(struct rtnl_link_stats));
+			if (attr == NULL)
+				goto nla_put_failure;
+
+			copy_rtnl_link_stats(nla_data(attr), stats);
 		}
 	}
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (iwbuf)
+		NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->args[0];
@@ -322,10 +352,9 @@ static int rtnetlink_dump_ifinfo(struct 
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
-					  NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, 0,
-					  NLM_F_MULTI) <= 0)
+		if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
+				     NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
 			break;
 	}
 	read_unlock(&dev_base_lock);
@@ -334,52 +363,69 @@ static int rtnetlink_dump_ifinfo(struct 
 	return skb->len;
 }
 
-static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+	[IFLA_IFNAME]		= { .type = NLA_STRING },
+	[IFLA_MAP]		= { .minlen = sizeof(struct rtnl_link_ifmap) },
+	[IFLA_MTU]		= { .type = NLA_U32 },
+	[IFLA_TXQLEN]		= { .type = NLA_U32 },
+	[IFLA_WEIGHT]		= { .type = NLA_U32 },
+	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
+	[IFLA_LINKMODE]		= { .type = NLA_U8 },
+};
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ifinfomsg  *ifm = NLMSG_DATA(nlh);
-	struct rtattr    **ida = arg;
+	struct ifinfomsg *ifm;
 	struct net_device *dev;
-	int err, send_addr_notify = 0;
+	int err, send_addr_notify = 0, modified = 0;
+	struct nlattr *tb[IFLA_MAX+1];
+	char ifname[IFNAMSIZ];
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
 
+	if (tb[IFLA_IFNAME] &&
+	    nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ) >= IFNAMSIZ)
+		return -EINVAL;
+
+	err = -EINVAL;
+	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index >= 0)
 		dev = dev_get_by_index(ifm->ifi_index);
-	else if (ida[IFLA_IFNAME - 1]) {
-		char ifname[IFNAMSIZ];
-
-		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-		                   IFNAMSIZ) >= IFNAMSIZ)
-			return -EINVAL;
+	else if (tb[IFLA_IFNAME])
 		dev = dev_get_by_name(ifname);
-	} else
-		return -EINVAL;
+	else
+		goto errout;
 
-	if (!dev)
-		return -ENODEV;
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
 
-	err = -EINVAL;
+	if (tb[IFLA_ADDRESS] &&
+	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+		goto errout_dev;
 
-	if (ifm->ifi_flags)
-		dev_change_flags(dev, ifm->ifi_flags);
+	if (tb[IFLA_BROADCAST] &&
+	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+		goto errout_dev;
 
-	if (ida[IFLA_MAP - 1]) {
+	if (tb[IFLA_MAP]) {
 		struct rtnl_link_ifmap *u_map;
 		struct ifmap k_map;
 
 		if (!dev->set_config) {
 			err = -EOPNOTSUPP;
-			goto out;
+			goto errout_dev;
 		}
 
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto out;
+			goto errout_dev;
 		}
-		
-		if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
-			goto out;
-
-		u_map = RTA_DATA(ida[IFLA_MAP - 1]);
 
+		u_map = nla_data(tb[IFLA_MAP]);
 		k_map.mem_start = (unsigned long) u_map->mem_start;
 		k_map.mem_end = (unsigned long) u_map->mem_end;
 		k_map.base_addr = (unsigned short) u_map->base_addr;
@@ -388,187 +434,175 @@ static int do_setlink(struct sk_buff *sk
 		k_map.port = (unsigned char) u_map->port;
 
 		err = dev->set_config(dev, &k_map);
+		if (err < 0)
+			goto errout_dev;
 
-		if (err)
-			goto out;
+		modified = 1;
 	}
 
-	if (ida[IFLA_ADDRESS - 1]) {
+	if (tb[IFLA_ADDRESS]) {
+		struct sockaddr *sa;
+		int len;
+
 		if (!dev->set_mac_address) {
 			err = -EOPNOTSUPP;
-			goto out;
+			goto errout_dev;
 		}
+
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto out;
+			goto errout_dev;
 		}
-		if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-			goto out;
 
-		err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+		len = sizeof(sa_family_t) + dev->addr_len;
+		sa = kmalloc(len, GFP_KERNEL);
+		if (!sa) {
+			err = -ENOMEM;
+			goto errout_dev;
+		}
+		sa->sa_family = dev->type;
+		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+		       dev->addr_len);
+		err = dev->set_mac_address(dev, sa);
+		kfree(sa);
 		if (err)
-			goto out;
+			goto errout_dev;
 		send_addr_notify = 1;
+		modified = 1;
 	}
 
-	if (ida[IFLA_BROADCAST - 1]) {
-		if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-			goto out;
-		memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
-		       dev->addr_len);
-		send_addr_notify = 1;
+	if (tb[IFLA_MTU]) {
+		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
+	}
+
+	/*
+	 * Interface selected by interface index but interface
+	 * name provided implies that a name change has been
+	 * requested.
+	 */
+	if (ifm->ifi_index >= 0 && ifname[0]) {
+		err = dev_change_name(dev, ifname);
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
 	}
 
-	if (ida[IFLA_MTU - 1]) {
-		if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
-		err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
-
-		if (err)
-			goto out;
-
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+	if (tb[IFLA_WIRELESS]) {
+		/* Call Wireless Extensions.
+		 * Various stuff checked in there... */
+		err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]));
+		if (err < 0)
+			goto errout_dev;
 	}
+#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-	if (ida[IFLA_TXQLEN - 1]) {
-		if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
-
-		dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
+	if (tb[IFLA_BROADCAST]) {
+		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+		send_addr_notify = 1;
 	}
 
-	if (ida[IFLA_WEIGHT - 1]) {
-		if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
 
-		dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
-	}
+	if (ifm->ifi_flags)
+		dev_change_flags(dev, ifm->ifi_flags);
 
-	if (ida[IFLA_OPERSTATE - 1]) {
-		if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-			goto out;
+	if (tb[IFLA_TXQLEN])
+		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
-		set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1])));
-	}
+	if (tb[IFLA_WEIGHT])
+		dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
 
-	if (ida[IFLA_LINKMODE - 1]) {
-		if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-			goto out;
+	if (tb[IFLA_OPERSTATE])
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 
+	if (tb[IFLA_LINKMODE]) {
 		write_lock_bh(&dev_base_lock);
-		dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1]));
+		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
 		write_unlock_bh(&dev_base_lock);
 	}
 
-	if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
-		char ifname[IFNAMSIZ];
-
-		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-		                   IFNAMSIZ) >= IFNAMSIZ)
-			goto out;
-		err = dev_change_name(dev, ifname);
-		if (err)
-			goto out;
-	}
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (ida[IFLA_WIRELESS - 1]) {
-
-		/* Call Wireless Extensions.
-		 * Various stuff checked in there... */
-		err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len);
-		if (err)
-			goto out;
-	}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 	err = 0;
 
-out:
+errout_dev:
+	if (err < 0 && modified && net_ratelimit())
+		printk(KERN_WARNING "A link change request failed with "
+		       "some changes comitted already. Interface %s may "
+		       "have been left with an inconsistent configuration, "
+		       "please check.\n", dev->name);
+
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 
 	dev_put(dev);
+errout:
 	return err;
 }
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct ifinfomsg  *ifm = NLMSG_DATA(in_nlh);
-	struct rtattr    **ida = arg;
-	struct net_device *dev;
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	int err = -ENOBUFS;
-	struct sk_buff *skb;
-	unsigned char	 *b;
-	char *iw_buf = NULL;
+	struct ifinfomsg *ifm;
+	struct nlattr *tb[IFLA_MAX+1];
+	struct net_device *dev = NULL;
+	struct sk_buff *nskb;
+	char *iw_buf = NULL, *iw = NULL;
 	int iw_buf_len = 0;
+	int err, payload;
 
-	if (ifm->ifi_index >= 0)
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index >= 0) {
 		dev = dev_get_by_index(ifm->ifi_index);
-	else
+		if (dev == NULL)
+			return -ENODEV;
+	} else
 		return -EINVAL;
-	if (!dev)
-		return -ENODEV;
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (ida[IFLA_WIRELESS - 1]) {
 
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+	if (tb[IFLA_WIRELESS]) {
 		/* Call Wireless Extensions. We need to know the size before
 		 * we can alloc. Various stuff checked in there... */
-		err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len);
-		if (err)
-			goto out;
+		err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]),
+					     &iw_buf, &iw_buf_len);
+		if (err < 0)
+			goto errout;
+
+		iw += IW_EV_POINT_OFF;
 	}
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-	/* Create a skb big enough to include all the data.
-	 * Some requests are way bigger than 4k... Jean II */
-	skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)),
-			GFP_KERNEL);
-	if (!skb)
-		goto out;
-	b = skb->tail;
-
-	/* Put in the message the usual good stuff */
-	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq,
-			RTM_NEWLINK, sizeof(*r));
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev->flags;
-	r->ifi_change = 0;
-
-	/* Put the wireless payload if it exist */
-	if(iw_buf != NULL)
-		RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len,
-			iw_buf + IW_EV_POINT_OFF);
-
-	nlh->nlmsg_len = skb->tail - b;
-
-	/* Needed ? */
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:
-	if(iw_buf != NULL)
-		kfree(iw_buf);
+	payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
+			      nla_total_size(iw_buf_len));
+	nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	if (nskb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
+			       NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
+	if (err <= 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_unicast(skb, NETLINK_CB(skb).pid);
+errout:
+	kfree(iw_buf);
 	dev_put(dev);
-	return err;
 
-rtattr_failure:
-nlmsg_failure:
-	kfree_skb(skb);
-	goto out;
+	return err;
 }
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->family;
@@ -595,20 +629,22 @@ static int rtnetlink_dump_all(struct sk_
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
-			       sizeof(struct rtnl_link_ifmap) +
-			       sizeof(struct rtnl_link_stats) + 128);
-
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb)
-		return;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
+	err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /* Protected by RTNL sempahore.  */
@@ -733,18 +769,19 @@ static void rtnetlink_rcv(struct sock *s
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
-	[RTM_GETLINK     - RTM_BASE] = {
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-					 .doit   = do_getlink,
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-					 .dumpit = rtnetlink_dump_ifinfo },
-	[RTM_SETLINK     - RTM_BASE] = { .doit   = do_setlink		 },
-	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
-	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_GETLINK     - RTM_BASE] = { .doit   = rtnl_getlink,
+					 .dumpit = rtnl_dump_ifinfo	 },
+	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
+	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
+	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
 	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
 	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
-	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+#ifdef CONFIG_FIB_RULES
+	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
+	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
+#endif
+	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
 	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
 };
@@ -804,7 +841,9 @@ EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
 EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
-EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_unicast);
+EXPORT_SYMBOL(rtnl_notify);
+EXPORT_SYMBOL(rtnl_set_sk_err);
diff -puN net/core/skbuff.c~git-net net/core/skbuff.c
--- a/net/core/skbuff.c~git-net
+++ a/net/core/skbuff.c
@@ -268,8 +268,10 @@ struct sk_buff *__netdev_alloc_skb(struc
 	struct sk_buff *skb;
 
 	skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
-	if (likely(skb))
+	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
+		skb->dev = dev;
+	}
 	return skb;
 }
 
@@ -1395,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct 
 	unsigned int csum;
 	long csstart;
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		csstart = skb->h.raw - skb->data;
 	else
 		csstart = skb_headlen(skb);
@@ -1409,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct 
 		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
 					      skb->len - csstart, 0);
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		long csstuff = csstart + skb->csum;
 
 		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
@@ -1896,10 +1898,10 @@ int skb_append_datato_frags(struct sock 
  *	@len: length of data pulled
  *
  *	This function performs an skb_pull on the packet and updates
- *	update the CHECKSUM_HW checksum.  It should be used on receive
- *	path processing instead of skb_pull unless you know that the
- *	checksum difference is zero (e.g., a valid IP header) or you
- *	are setting ip_summed to CHECKSUM_NONE.
+ *	update the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_pull unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
  */
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
@@ -1992,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_bu
 		frag = skb_shinfo(nskb)->frags;
 		k = 0;
 
-		nskb->ip_summed = CHECKSUM_HW;
+		nskb->ip_summed = CHECKSUM_PARTIAL;
 		nskb->csum = skb->csum;
 		memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
 
diff -puN net/core/sock.c~git-net net/core/sock.c
--- a/net/core/sock.c~git-net
+++ a/net/core/sock.c
@@ -187,13 +187,13 @@ static struct lock_class_key af_callback
 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
 /* Run time adjustable parameters. */
-__u32 sysctl_wmem_max = SK_WMEM_MAX;
-__u32 sysctl_rmem_max = SK_RMEM_MAX;
-__u32 sysctl_wmem_default = SK_WMEM_MAX;
-__u32 sysctl_rmem_default = SK_RMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 /* Maximal space eaten by iovec or ancilliary data plus some space */
-int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
@@ -911,7 +911,7 @@ struct sock *sk_clone(const struct sock 
 	if (newsk != NULL) {
 		struct sk_filter *filter;
 
-		memcpy(newsk, sk, sk->sk_prot->obj_size);
+		sock_copy(newsk, sk);
 
 		/* SANITY */
 		sk_node_init(&newsk->sk_node);
diff -puN net/core/wireless.c~git-net net/core/wireless.c
--- a/net/core/wireless.c~git-net
+++ a/net/core/wireless.c
@@ -72,7 +72,6 @@
 
 /***************************** INCLUDES *****************************/
 
-#include <linux/config.h>		/* Not needed ??? */
 #include <linux/module.h>
 #include <linux/types.h>		/* off_t */
 #include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
@@ -86,6 +85,7 @@
 
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
+#include <net/netlink.h>
 
 #include <asm/uaccess.h>		/* copy_to_user() */
 
@@ -1850,7 +1850,7 @@ static void wireless_nlevent_process(uns
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
+		 rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 }
 
 static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
diff -puN net/dccp/ipv4.c~git-net net/dccp/ipv4.c
--- a/net/dccp/ipv4.c~git-net
+++ a/net/dccp/ipv4.c
@@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk
 
 	dccp_openreq_init(req, &dp, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
@@ -605,10 +608,10 @@ static struct sock *dccp_v4_hnd_req(stru
 	if (req != NULL)
 		return dccp_check_req(sk, skb, req, prev);
 
-	nsk = __inet_lookup_established(&dccp_hashinfo,
-					iph->saddr, dh->dccph_sport,
-					iph->daddr, ntohs(dh->dccph_dport),
-					inet_iif(skb));
+	nsk = inet_lookup_established(&dccp_hashinfo,
+				      iph->saddr, dh->dccph_sport,
+				      iph->daddr, dh->dccph_dport,
+				      inet_iif(skb));
 	if (nsk != NULL) {
 		if (nsk->sk_state != DCCP_TIME_WAIT) {
 			bh_lock_sock(nsk);
@@ -678,6 +681,7 @@ static struct dst_entry* dccp_v4_route_s
 			   	     }
 			  };
 
+	security_skb_classify_flow(skb, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
@@ -921,7 +925,7 @@ static int dccp_v4_rcv(struct sk_buff *s
 	 * 	Look up flow ID in table and get corresponding socket */
 	sk = __inet_lookup(&dccp_hashinfo,
 			   skb->nh.iph->saddr, dh->dccph_sport,
-			   skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+			   skb->nh.iph->daddr, dh->dccph_dport,
 			   inet_iif(skb));
 
 	/* 
diff -puN net/dccp/ipv6.c~git-net net/dccp/ipv6.c
--- a/net/dccp/ipv6.c~git-net
+++ a/net/dccp/ipv6.c
@@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt != NULL && np->opt->srcrt != NULL) {
 		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_sk_classify_flow(sk, &fl);
 
 			err = ip6_dst_lookup(sk, &dst, &fl);
 			if (err) {
@@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct 
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_req_classify_flow(req, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struc
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_skb_classify_flow(rxskb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
@@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struc
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_req_classify_flow(req, &fl);
 
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
 		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -704,6 +709,9 @@ static int dccp_v6_conn_request(struct s
 
 	dccp_openreq_init(req, &dp, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq6 = inet6_rsk(req);
 	ireq = inet_rsk(req);
 	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
@@ -842,6 +850,7 @@ static struct sock *dccp_v6_request_recv
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
diff -puN net/decnet/Kconfig~git-net net/decnet/Kconfig
--- a/net/decnet/Kconfig~git-net
+++ a/net/decnet/Kconfig
@@ -27,6 +27,7 @@ config DECNET
 config DECNET_ROUTER
 	bool "DECnet: router support (EXPERIMENTAL)"
 	depends on DECNET && EXPERIMENTAL
+	select FIB_RULES
 	---help---
 	  Add support for turning your DECnet Endnode into a level 1 or 2
 	  router.  This is an experimental, but functional option.  If you
diff -puN net/decnet/af_decnet.c~git-net net/decnet/af_decnet.c
--- a/net/decnet/af_decnet.c~git-net
+++ a/net/decnet/af_decnet.c
@@ -130,6 +130,7 @@ Version 0.0.6    2.1.110   07-aug-98   E
 #include <linux/poll.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_nsp.h>
 #include <net/dn_dev.h>
diff -puN net/decnet/dn_dev.c~git-net net/decnet/dn_dev.c
--- a/net/decnet/dn_dev.c~git-net
+++ a/net/decnet/dn_dev.c
@@ -34,6 +34,7 @@
 #include <linux/seq_file.h>
 #include <linux/timer.h>
 #include <linux/string.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/skbuff.h>
@@ -45,6 +46,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_dev.h>
 #include <net/dn_route.h>
@@ -744,20 +746,23 @@ rtattr_failure:
 static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+	int payload = sizeof(struct ifaddrmsg) + 128;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS);
-		return;
-	}
-	if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
 }
 
 static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1417,8 +1422,6 @@ static struct rtnetlink_link dnet_rtnetl
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_delroute,	},
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
 				      .dumpit	= dn_fib_dump,		},
-	[RTM_NEWRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_newrule,	},
-	[RTM_DELRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_delrule,	},
 	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= dn_fib_dump_rules,	},
 #else
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
diff -puN net/decnet/dn_fib.c~git-net net/decnet/dn_fib.c
--- a/net/decnet/dn_fib.c~git-net
+++ a/net/decnet/dn_fib.c
@@ -34,6 +34,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_route.h>
 #include <net/dn_fib.h>
@@ -54,11 +55,9 @@
 
 #define endfor_nexthops(fi) }
 
-extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
 static DEFINE_SPINLOCK(dn_fib_multipath_lock);
 static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_RWLOCK(dn_fib_info_lock);
+static DEFINE_SPINLOCK(dn_fib_info_lock);
 
 static struct
 {
@@ -79,6 +78,9 @@ static struct
 	[RTN_XRESOLVE] =    { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
 };
 
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
+static int dn_fib_sync_up(struct net_device *dev);
+
 void dn_fib_free_info(struct dn_fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
@@ -96,7 +98,7 @@ void dn_fib_free_info(struct dn_fib_info
 
 void dn_fib_release_info(struct dn_fib_info *fi)
 {
-	write_lock(&dn_fib_info_lock);
+	spin_lock(&dn_fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		if (fi->fib_next)
 			fi->fib_next->fib_prev = fi->fib_prev;
@@ -107,7 +109,7 @@ void dn_fib_release_info(struct dn_fib_i
 		fi->fib_dead = 1;
 		dn_fib_info_put(fi);
 	}
-	write_unlock(&dn_fib_info_lock);
+	spin_unlock(&dn_fib_info_lock);
 }
 
 static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
@@ -378,13 +380,13 @@ link_it:
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock(&dn_fib_info_lock);
+	spin_lock(&dn_fib_info_lock);
 	fi->fib_next = dn_fib_info_list;
 	fi->fib_prev = NULL;
 	if (dn_fib_info_list)
 		dn_fib_info_list->fib_prev = fi;
 	dn_fib_info_list = fi;
-	write_unlock(&dn_fib_info_lock);
+	spin_unlock(&dn_fib_info_lock);
 	return fi;
 
 err_inval:
@@ -490,7 +492,8 @@ static int dn_fib_check_attr(struct rtms
 		if (attr) {
 			if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
 				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
+			    i != RTA_TABLE)
 				rta[i-1] = (struct rtattr *)RTA_DATA(attr);
 		}
 	}
@@ -507,7 +510,7 @@ int dn_fib_rtm_delroute(struct sk_buff *
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = dn_fib_get_table(r->rtm_table, 0);
+	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
 	if (tb)
 		return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
 
@@ -523,46 +526,13 @@ int dn_fib_rtm_newroute(struct sk_buff *
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = dn_fib_get_table(r->rtm_table, 1);
+	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
 	if (tb) 
 		return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
 
 	return -ENOBUFS;
 }
 
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	int t;
-	int s_t;
-	struct dn_fib_table *tb;
-
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
-		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
-			return dn_cache_dump(skb, cb);
-
-	s_t = cb->args[0];
-	if (s_t == 0)
-		s_t = cb->args[0] = RT_MIN_TABLE;
-
-	for(t = s_t; t <= RT_TABLE_MAX; t++) {
-		if (t < s_t)
-			continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0,
-			       sizeof(cb->args) - sizeof(cb->args[0]));
-		tb = dn_fib_get_table(t, 0);
-		if (tb == NULL)
-			continue;
-		if (tb->dump(tb, skb, cb) < 0)
-			break;
-	}
-
-	cb->args[0] = t;
-
-	return skb->len;
-}
-
 static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
 {
 	struct dn_fib_table *tb;
@@ -682,7 +652,7 @@ static int dn_fib_dnaddr_event(struct no
 	return NOTIFY_DONE;
 }
 
-int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
 {
         int ret = 0;
         int scope = RT_SCOPE_NOWHERE;
@@ -726,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struc
 }
 
 
-int dn_fib_sync_up(struct net_device *dev)
+static int dn_fib_sync_up(struct net_device *dev)
 {
         int ret = 0;
 
@@ -760,22 +730,6 @@ int dn_fib_sync_up(struct net_device *de
         return ret;
 }
 
-void dn_fib_flush(void)
-{
-        int flushed = 0;
-        struct dn_fib_table *tb;
-        int id;
-
-        for(id = RT_TABLE_MAX; id > 0; id--) {
-                if ((tb = dn_fib_get_table(id, 0)) == NULL)
-                        continue;
-                flushed += tb->flush(tb);
-        }
-
-        if (flushed)
-                dn_rt_cache_flush(-1);
-}
-
 static struct notifier_block dn_fib_dnaddr_notifier = {
 	.notifier_call = dn_fib_dnaddr_event,
 };
diff -puN net/decnet/dn_route.c~git-net net/decnet/dn_route.c
--- a/net/decnet/dn_route.c~git-net
+++ a/net/decnet/dn_route.c
@@ -80,6 +80,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_dev.h>
 #include <net/dn_nsp.h>
@@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk
 		dev_hold(out_dev);
 
 		if (res.r)
-			src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags);
+			src_map = fl.fld_src; /* no NAT support for now */
 
 		gateway = DN_FIB_RES_GW(res);
 		if (res.type == RTN_NAT) {
@@ -1485,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buf
 	r->rtm_src_len = 0;
 	r->rtm_tos = 0;
 	r->rtm_table = RT_TABLE_MAIN;
+	RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type = rt->rt_type;
 	r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	r->rtm_scope = RT_SCOPE_UNIVERSE;
@@ -1609,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-
-	return err;
+	return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 
 out_free:
 	kfree_skb(skb);
diff -puN net/decnet/dn_rules.c~git-net net/decnet/dn_rules.c
--- a/net/decnet/dn_rules.c~git-net
+++ a/net/decnet/dn_rules.c
@@ -11,259 +11,199 @@
  *
  *
  * Changes:
+ *              Steve Whitehouse <steve@chygwyn.com>
+ *              Updated for Thomas Graf's generic rules
  *
  */
-#include <linux/string.h>
 #include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
 #include <linux/init.h>
-#include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
 #include <linux/netdevice.h>
-#include <linux/timer.h>
 #include <linux/spinlock.h>
-#include <linux/in_route.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
 
+static struct fib_rules_ops dn_fib_rules_ops;
+
 struct dn_fib_rule
 {
-	struct hlist_node	r_hlist;
-	atomic_t		r_clntref;
-	u32			r_preference;
-	unsigned char		r_table;
-	unsigned char		r_action;
-	unsigned char		r_dst_len;
-	unsigned char		r_src_len;
-	__le16			r_src;
-	__le16			r_srcmask;
-	__le16			r_dst;
-	__le16			r_dstmask;
-	__le16			r_srcmap;
-	u8			r_flags;
+	struct fib_rule		common;
+	unsigned char		dst_len;
+	unsigned char		src_len;
+	__le16			src;
+	__le16			srcmask;
+	__le16			dst;
+	__le16			dstmask;
+	__le16			srcmap;
+	u8			flags;
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	u32			r_fwmark;
+	u32			fwmark;
 #endif
-	int			r_ifindex;
-	char			r_ifname[IFNAMSIZ];
-	int			r_dead;
-	struct rcu_head		rcu;
 };
 
 static struct dn_fib_rule default_rule = {
-	.r_clntref =		ATOMIC_INIT(2),
-	.r_preference =		0x7fff,
-	.r_table =		RT_TABLE_MAIN,
-	.r_action =		RTN_UNICAST
+	.common = {
+		.refcnt =		ATOMIC_INIT(2),
+		.pref =			0x7fff,
+		.table =		RT_TABLE_MAIN,
+		.action =		FR_ACT_TO_TBL,
+	},
 };
 
-static struct hlist_head dn_fib_rules;
+static LIST_HEAD(dn_fib_rules);
+
 
-int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct dn_fib_rule *r;
-	struct hlist_node *node;
-	int err = -ESRCH;
-
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) &&
-			rtm->rtm_src_len == r->r_src_len &&
-			rtm->rtm_dst_len == r->r_dst_len &&
-			(!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) &&
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-			(!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
-			(!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
-			(!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
-			(!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
-			(!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-
-			err = -EPERM;
-			if (r == &default_rule)
-				break;
-
-			hlist_del_rcu(&r->r_hlist);
-			r->r_dead = 1;
-			dn_fib_rule_put(r);
-			err = 0;
-			break;
-		}
-	}
+	struct fib_lookup_arg arg = {
+		.result = res,
+	};
+	int err;
+
+	err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg);
+	res->r = arg.rule;
 
 	return err;
 }
 
-static inline void dn_fib_rule_put_rcu(struct rcu_head *head)
+static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
+			      int flags, struct fib_lookup_arg *arg)
 {
-	struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu);
-	kfree(r);
-}
+	int err = -EAGAIN;
+	struct dn_fib_table *tbl;
 
-void dn_fib_rule_put(struct dn_fib_rule *r)
-{
-	if (atomic_dec_and_test(&r->r_clntref)) {
-		if (r->r_dead)
-			call_rcu(&r->rcu, dn_fib_rule_put_rcu);
-		else
-			printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n");
-	}
+	switch(rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		goto errout;
+
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		goto errout;
+
+	case FR_ACT_BLACKHOLE:
+	default:
+		err = -EINVAL;
+		goto errout;
+	}
+
+	tbl = dn_fib_get_table(rule->table, 0);
+	if (tbl == NULL)
+		goto errout;
+
+	err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result);
+	if (err > 0)
+		err = -EAGAIN;
+errout:
+	return err;
 }
 
+static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .type = NLA_U16 },
+	[FRA_DST]	= { .type = NLA_U16 },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_TABLE]     = { .type = NLA_U32 },
+};
 
-int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct dn_fib_rule *r, *new_r, *last = NULL;
-	struct hlist_node *node = NULL;
-	unsigned char table_id;
-
-	if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16)
-		return -EINVAL;
-
-	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
-		return -EINVAL;
-
-	if (rtm->rtm_type == RTN_NAT)
-		return -EINVAL;
-
-	table_id = rtm->rtm_table;
-	if (table_id == RT_TABLE_UNSPEC) {
-		struct dn_fib_table *tb;
-		if (rtm->rtm_type == RTN_UNICAST) {
-			if ((tb = dn_fib_empty_table()) == NULL)
-				return -ENOBUFS;
-			table_id = tb->n;
-		}
-	}
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+	u16 daddr = fl->fld_dst;
+	u16 saddr = fl->fld_src;
+
+	if (((saddr ^ r->src) & r->srcmask) ||
+	    ((daddr ^ r->dst) & r->dstmask))
+		return 0;
 
-	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
-	if (!new_r)
-		return -ENOMEM;
-
-	if (rta[RTA_SRC-1])
-		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
-	if (rta[RTA_DST-1])
-		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2);
-	if (rta[RTA_GATEWAY-1])
-		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2);
-	new_r->r_src_len = rtm->rtm_src_len;
-	new_r->r_dst_len = rtm->rtm_dst_len;
-	new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len);
-	new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len);
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (rta[RTA_PROTOINFO-1])
-		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+	if (r->fwmark && (r->fwmark != fl->fld_fwmark))
+		return 0;
 #endif
-	new_r->r_action = rtm->rtm_type;
-	new_r->r_flags = rtm->rtm_flags;
-	if (rta[RTA_PRIORITY-1])
-		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	new_r->r_table = table_id;
-	if (rta[RTA_IIF-1]) {
-		struct net_device *dev;
-		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
-		new_r->r_ifindex = -1;
-		dev = dev_get_by_name(new_r->r_ifname);
-		if (dev) {
-			new_r->r_ifindex = dev->ifindex;
-			dev_put(dev);
-		}
-	}
 
-	r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist);
-	if (!new_r->r_preference) {
-		if (r && r->r_hlist.next != NULL) {
-			r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist);
-			if (r->r_preference)
-				new_r->r_preference = r->r_preference - 1;
+	return 1;
+}
+
+static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+				 struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+	if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
+		goto  errout;
+
+	if (rule->table == RT_TABLE_UNSPEC) {
+		if (rule->action == FR_ACT_TO_TBL) {
+			struct dn_fib_table *table;
+
+			table = dn_fib_empty_table();
+			if (table == NULL) {
+				err = -ENOBUFS;
+				goto errout;
+			}
+
+			rule->table = table->n;
 		}
 	}
 
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_preference > new_r->r_preference)
-			break;
-		last = r;
-	}
-	atomic_inc(&new_r->r_clntref);
+	if (tb[FRA_SRC])
+		r->src = nla_get_u16(tb[FRA_SRC]);
 
-	if (last)
-		hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist);
-	else
-		hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist);
-	return 0;
-}
+	if (tb[FRA_DST])
+		r->dst = nla_get_u16(tb[FRA_DST]);
+
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (tb[FRA_FWMARK])
+		r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+#endif
 
+	r->src_len = frh->src_len;
+	r->srcmask = dnet_make_mask(r->src_len);
+	r->dst_len = frh->dst_len;
+	r->dstmask = dnet_make_mask(r->dst_len);
+	err = 0;
+errout:
+	return err;
+}
 
-int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res)
+static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
 {
-	struct dn_fib_rule *r, *policy;
-	struct dn_fib_table *tb;
-	__le16 saddr = flp->fld_src;
-	__le16 daddr = flp->fld_dst;
-	struct hlist_node *node;
-	int err;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+	if (frh->src_len && (r->src_len != frh->src_len))
+		return 0;
 
-	rcu_read_lock();
+	if (frh->dst_len && (r->dst_len != frh->dst_len))
+		return 0;
 
-	hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) {
-		if (((saddr^r->r_src) & r->r_srcmask) ||
-		    ((daddr^r->r_dst) & r->r_dstmask) ||
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) ||
+	if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
 #endif
-		    (r->r_ifindex && r->r_ifindex != flp->iif))
-			continue;
 
-		switch(r->r_action) {
-			case RTN_UNICAST:
-			case RTN_NAT:
-				policy = r;
-				break;
-			case RTN_UNREACHABLE:
-				rcu_read_unlock();
-				return -ENETUNREACH;
-			default:
-			case RTN_BLACKHOLE:
-				rcu_read_unlock();
-				return -EINVAL;
-			case RTN_PROHIBIT:
-				rcu_read_unlock();
-				return -EACCES;
-		}
+	if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
+		return 0;
 
-		if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL)
-			continue;
-		err = tb->lookup(tb, flp, res);
-		if (err == 0) {
-			res->r = policy;
-			if (policy)
-				atomic_inc(&policy->r_clntref);
-			rcu_read_unlock();
-			return 0;
-		}
-		if (err < 0 && err != -EAGAIN) {
-			rcu_read_unlock();
-			return err;
-		}
-	}
+	if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST])))
+		return 0;
 
-	rcu_read_unlock();
-	return -ESRCH;
+	return 1;
 }
 
 unsigned dnet_addr_type(__le16 addr)
@@ -271,7 +211,7 @@ unsigned dnet_addr_type(__le16 addr)
 	struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
 	struct dn_fib_res res;
 	unsigned ret = RTN_UNICAST;
-	struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL];
+	struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
 
 	res.r = NULL;
 
@@ -284,142 +224,77 @@ unsigned dnet_addr_type(__le16 addr)
 	return ret;
 }
 
-__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags)
+static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			    struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
 {
-	struct dn_fib_rule *r = res->r;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	if (r->r_action == RTN_NAT) {
-		int addrtype = dnet_addr_type(r->r_srcmap);
+	frh->family = AF_DECnet;
+	frh->dst_len = r->dst_len;
+	frh->src_len = r->src_len;
+	frh->tos = 0;
 
-		if (addrtype == RTN_NAT) {
-			saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
-			*flags |= RTCF_SNAT;
-		} else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
-			saddr = r->r_srcmap;
-			*flags |= RTCF_MASQ;
-		}
-	}
-	return saddr;
-}
-
-static void dn_fib_rules_detach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct dn_fib_rule *r;
-
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_ifindex == dev->ifindex)
-			r->r_ifindex = -1;
-	}
-}
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (r->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
+#endif
+	if (r->dst_len)
+		NLA_PUT_U16(skb, FRA_DST, r->dst);
+	if (r->src_len)
+		NLA_PUT_U16(skb, FRA_SRC, r->src);
 
-static void dn_fib_rules_attach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct dn_fib_rule *r;
+	return 0;
 
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
-			r->r_ifindex = dev->ifindex;
-	}
+nla_put_failure:
+	return -ENOBUFS;
 }
 
-static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+static u32 dn_fib_rule_default_pref(void)
 {
-	struct net_device *dev = ptr;
+	struct list_head *pos;
+	struct fib_rule *rule;
 
-	switch(event) {
-		case NETDEV_UNREGISTER:
-			dn_fib_rules_detach(dev);
-			dn_fib_sync_down(0, dev, 1);
-		case NETDEV_REGISTER:
-			dn_fib_rules_attach(dev);
-			dn_fib_sync_up(dev);
+	if (!list_empty(&dn_fib_rules)) {
+		pos = dn_fib_rules.next;
+		if (pos->next != &dn_fib_rules) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
 	}
 
-	return NOTIFY_DONE;
-}
-
-
-static struct notifier_block dn_fib_rules_notifier = {
-	.notifier_call =	dn_fib_rules_event,
-};
-
-static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
-			    struct netlink_callback *cb, unsigned int flags)
-{
-	struct rtmsg *rtm;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-
-
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
-	rtm->rtm_family = AF_DECnet;
-	rtm->rtm_dst_len = r->r_dst_len;
-	rtm->rtm_src_len = r->r_src_len;
-	rtm->rtm_tos = 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (r->r_fwmark)
-		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
-#endif
-	rtm->rtm_table = r->r_table;
-	rtm->rtm_protocol = 0;
-	rtm->rtm_scope = 0;
-	rtm->rtm_type = r->r_action;
-	rtm->rtm_flags = r->r_flags;
-
-	if (r->r_dst_len)
-		RTA_PUT(skb, RTA_DST, 2, &r->r_dst);
-	if (r->r_src_len)
-		RTA_PUT(skb, RTA_SRC, 2, &r->r_src);
-	if (r->r_ifname[0])
-		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
-	if (r->r_preference)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
-	if (r->r_srcmap)
-		RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return 0;
 }
 
 int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx = 0;
-	int s_idx = cb->args[0];
-	struct dn_fib_rule *r;
-	struct hlist_node *node;
-
-	rcu_read_lock();
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (idx < s_idx)
-			goto next;
-		if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
-			break;
-next:
-		idx++;
-	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
-
-	return skb->len;
+	return fib_rules_dump(skb, cb, AF_DECnet);
 }
 
+static struct fib_rules_ops dn_fib_rules_ops = {
+	.family		= AF_DECnet,
+	.rule_size	= sizeof(struct dn_fib_rule),
+	.action		= dn_fib_rule_action,
+	.match		= dn_fib_rule_match,
+	.configure	= dn_fib_rule_configure,
+	.compare	= dn_fib_rule_compare,
+	.fill		= dn_fib_rule_fill,
+	.default_pref	= dn_fib_rule_default_pref,
+	.nlgroup	= RTNLGRP_DECnet_RULE,
+	.policy		= dn_fib_rule_policy,
+	.rules_list	= &dn_fib_rules,
+	.owner		= THIS_MODULE,
+};
+
 void __init dn_fib_rules_init(void)
 {
-	INIT_HLIST_HEAD(&dn_fib_rules);
-	hlist_add_head(&default_rule.r_hlist, &dn_fib_rules);
-	register_netdevice_notifier(&dn_fib_rules_notifier);
+	list_add_tail(&default_rule.common.list, &dn_fib_rules);
+	fib_rules_register(&dn_fib_rules_ops);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
-	unregister_netdevice_notifier(&dn_fib_rules_notifier);
+	fib_rules_unregister(&dn_fib_rules_ops);
 }
 
 
diff -puN net/decnet/dn_table.c~git-net net/decnet/dn_table.c
--- a/net/decnet/dn_table.c~git-net
+++ a/net/decnet/dn_table.c
@@ -30,6 +30,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_route.h>
 #include <net/dn_fib.h>
@@ -74,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f
 for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
 
 #define RT_TABLE_MIN 1
-
+#define DN_FIB_TABLE_HASHSZ 256
+static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
 static DEFINE_RWLOCK(dn_fib_tables_lock);
-struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
 
 static kmem_cache_t *dn_hash_kmem __read_mostly;
 static int dn_fib_hash_zombies;
@@ -263,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg 
 }
 
 static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-                        u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
+                        u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
                         struct dn_fib_info *fi, unsigned int flags)
 {
         struct rtmsg *rtm;
@@ -277,6 +278,7 @@ static int dn_fib_dump_info(struct sk_bu
         rtm->rtm_src_len = 0;
         rtm->rtm_tos = 0;
         rtm->rtm_table = tb_id;
+	RTA_PUT_U32(skb, RTA_TABLE, tb_id);
         rtm->rtm_flags = fi->fib_flags;
         rtm->rtm_scope = scope;
 	rtm->rtm_type  = type;
@@ -326,29 +328,29 @@ rtattr_failure:
 }
 
 
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
+static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
                         struct nlmsghdr *nlh, struct netlink_skb_parms *req)
 {
         struct sk_buff *skb;
         u32 pid = req ? req->pid : 0;
-        int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256);
+	int err = -ENOBUFS;
 
-        skb = alloc_skb(size, GFP_KERNEL);
-        if (!skb)
-                return;
-
-        if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 
-                                f->fn_type, f->fn_scope, &f->fn_key, z, 
-                                DN_FIB_INFO(f), 0) < 0) {
+        skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (skb == NULL)
+		goto errout;
+
+        err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
+			       f->fn_type, f->fn_scope, &f->fn_key, z,
+			       DN_FIB_INFO(f), 0);
+	if (err < 0) {
                 kfree_skb(skb);
-                return;
+		goto errout;
         }
-        NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE;
-        if (nlh->nlmsg_flags & NLM_F_ECHO)
-                atomic_inc(&skb->users);
-        netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL);
-        if (nlh->nlmsg_flags & NLM_F_ECHO)
-                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+	err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
 }
 
 static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 
@@ -359,7 +361,7 @@ static __inline__ int dn_hash_dump_bucke
 {
 	int i, s_i;
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	for(i = 0; f; i++, f = f->fn_next) {
 		if (i < s_i)
 			continue;
@@ -372,11 +374,11 @@ static __inline__ int dn_hash_dump_bucke
 				(f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
 				f->fn_scope, &f->fn_key, dz->dz_order, 
 				f->fn_info, NLM_F_MULTI) < 0) {
-			cb->args[3] = i;
+			cb->args[4] = i;
 			return -1;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -387,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(
 {
 	int h, s_h;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 	for(h = 0; h < dz->dz_divisor; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
 		if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
 			continue;
 		if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -411,26 +413,63 @@ static int dn_fib_table_dump(struct dn_f
 	struct dn_zone *dz;
 	struct dn_hash *table = (struct dn_hash *)tb->data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 	read_lock(&dn_fib_tables_lock);
 	for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
 		if (m < s_m)
 			continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
 		if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			read_unlock(&dn_fib_tables_lock);
 			return -1;
 		}
 	}
 	read_unlock(&dn_fib_tables_lock);
-	cb->args[1] = m;
+	cb->args[2] = m;
 
         return skb->len;
 }
 
+int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct dn_fib_table *tb;
+	struct hlist_node *node;
+	int dumped = 0;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+			return dn_cache_dump(skb, cb);
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
+			if (e < s_e)
+				goto next;
+			if (dumped)
+				memset(&cb->args[2], 0, sizeof(cb->args) -
+				                 2 * sizeof(cb->args[0]));
+			if (tb->dump(tb, skb, cb) < 0)
+				goto out;
+			dumped = 1;
+next:
+			e++;
+		}
+	}
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	return skb->len;
+}
+
 static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct dn_hash *table = (struct dn_hash *)tb->data;
@@ -739,9 +778,11 @@ out:
 }
 
 
-struct dn_fib_table *dn_fib_get_table(int n, int create)
+struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 {
         struct dn_fib_table *t;
+	struct hlist_node *node;
+	unsigned int h;
 
         if (n < RT_TABLE_MIN)
                 return NULL;
@@ -749,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(in
         if (n > RT_TABLE_MAX)
                 return NULL;
 
-        if (dn_fib_tables[n]) 
-                return dn_fib_tables[n];
+	h = n & (DN_FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
+		if (t->n == n) {
+			rcu_read_unlock();
+			return t;
+		}
+	}
+	rcu_read_unlock();
 
         if (!create)
                 return NULL;
@@ -771,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(in
         t->flush  = dn_fib_table_flush;
         t->dump = dn_fib_table_dump;
 	memset(t->data, 0, sizeof(struct dn_hash));
-        dn_fib_tables[n] = t;
+	hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
 
         return t;
 }
 
-static void dn_fib_del_tree(int n)
-{
-	struct dn_fib_table *t;
-
-	write_lock(&dn_fib_tables_lock);
-	t = dn_fib_tables[n];
-	dn_fib_tables[n] = NULL;
-	write_unlock(&dn_fib_tables_lock);
-
-	kfree(t);
-}
-
 struct dn_fib_table *dn_fib_empty_table(void)
 {
-        int id;
+        u32 id;
 
         for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
-                if (dn_fib_tables[id] == NULL)
+		if (dn_fib_get_table(id, 0) == NULL)
                         return dn_fib_get_table(id, 1);
         return NULL;
 }
 
+void dn_fib_flush(void)
+{
+        int flushed = 0;
+        struct dn_fib_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
+
+	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
+	                flushed += tb->flush(tb);
+        }
+
+        if (flushed)
+                dn_rt_cache_flush(-1);
+}
+
 void __init dn_fib_table_init(void)
 {
 	dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
@@ -808,10 +860,17 @@ void __init dn_fib_table_init(void)
 
 void __exit dn_fib_table_cleanup(void)
 {
-	int i;
-
-	for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i)
-		dn_fib_del_tree(i);
+	struct dn_fib_table *t;
+	struct hlist_node *node, *next;
+	unsigned int h;
 
-	return;
+	write_lock(&dn_fib_tables_lock);
+	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
+		                          hlist) {
+			hlist_del(&t->hlist);
+			kfree(t);
+		}
+	}
+	write_unlock(&dn_fib_tables_lock);
 }
diff -puN net/ipv4/Kconfig~git-net net/ipv4/Kconfig
--- a/net/ipv4/Kconfig~git-net
+++ a/net/ipv4/Kconfig
@@ -88,6 +88,7 @@ config IP_FIB_HASH
 config IP_MULTIPLE_TABLES
 	bool "IP: policy routing"
 	depends on IP_ADVANCED_ROUTER
+	select FIB_RULES
 	---help---
 	  Normally, a router decides what to do with a received packet based
 	  solely on the packet's final destination address. If you say Y here,
diff -puN net/ipv4/Makefile~git-net net/ipv4/Makefile
--- a/net/ipv4/Makefile~git-net
+++ a/net/ipv4/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vega
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		      xfrm4_output.o
diff -puN net/ipv4/af_inet.c~git-net net/ipv4/af_inet.c
--- a/net/ipv4/af_inet.c~git-net
+++ a/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -392,7 +391,7 @@ int inet_release(struct socket *sock)
 }
 
 /* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind;
+int sysctl_ip_nonlocal_bind __read_mostly;
 
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet
  *      Shall we try to damage output packets if routing dev changes?
  */
 
-int sysctl_ip_dynaddr;
+int sysctl_ip_dynaddr __read_mostly;
 
 static int inet_sk_reselect_saddr(struct sock *sk)
 {
@@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *
 		},
 	};
 						
+	security_sk_classify_flow(sk, &fl);
 	err = ip_route_output_flow(&rt, &fl, sk, 0);
 }
 	if (!err)
diff -puN net/ipv4/ah4.c~git-net net/ipv4/ah4.c
--- a/net/ipv4/ah4.c~git-net
+++ a/net/ipv4/ah4.c
@@ -34,7 +34,7 @@ static int ip_clear_mutable_options(stru
 		switch (*optptr) {
 		case IPOPT_SEC:
 		case 0x85:	/* Some "Extended Security" crap. */
-		case 0x86:	/* Another "Commercial Security" crap. */
+		case IPOPT_CIPSO:
 		case IPOPT_RA:
 		case 0x80|21:	/* RFC1770 */
 			break;
diff -puN /dev/null net/ipv4/cipso_ipv4.c
--- /dev/null
+++ a/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1607 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+struct cipso_v4_domhsh_entry {
+	char *domain;
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* List of available DOI definitions */
+/* XXX - Updates should be minimal so having a single lock for the
+ * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
+ * okay. */
+/* XXX - This currently assumes a minimal number of different DOIs in use,
+ * if in practice there are a lot of different DOIs this list should
+ * probably be turned into a hash table or something similar so we
+ * can do quick lookups. */
+static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
+
+/* Label mapping cache */
+int cipso_v4_cache_enabled = 1;
+int cipso_v4_cache_bucketsize = 10;
+#define CIPSO_V4_CACHE_BUCKETBITS     7
+#define CIPSO_V4_CACHE_BUCKETS        (1 << CIPSO_V4_CACHE_BUCKETBITS)
+#define CIPSO_V4_CACHE_REORDERLIMIT   10
+struct cipso_v4_map_cache_bkt {
+	spinlock_t lock;
+	u32 size;
+	struct list_head list;
+};
+struct cipso_v4_map_cache_entry {
+	u32 hash;
+	unsigned char *key;
+	size_t key_len;
+
+	struct netlbl_lsm_cache lsm_data;
+
+	u32 activity;
+	struct list_head list;
+};
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+/* Restricted bitmap (tag #1) flags */
+int cipso_v4_rbm_optfmt = 0;
+int cipso_v4_rbm_strictvalid = 1;
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
+ * @bitmap: the bitmap
+ * @bitmap_len: length in bits
+ * @offset: starting offset
+ * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
+ *
+ * Description:
+ * Starting at @offset, walk the bitmap from left to right until either the
+ * desired bit is found or we reach the end.  Return the bit offset, -1 if
+ * not found, or -2 if error.
+ */
+static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
+				u32 bitmap_len,
+				u32 offset,
+				u8 state)
+{
+	u32 bit_spot;
+	u32 byte_offset;
+	unsigned char bitmask;
+	unsigned char byte;
+
+	/* gcc always rounds to zero when doing integer division */
+	byte_offset = offset / 8;
+	byte = bitmap[byte_offset];
+	bit_spot = offset;
+	bitmask = 0x80 >> (offset % 8);
+
+	while (bit_spot < bitmap_len) {
+		if ((state && (byte & bitmask) == bitmask) ||
+		    (state == 0 && (byte & bitmask) == 0))
+			return bit_spot;
+
+		bit_spot++;
+		bitmask >>= 1;
+		if (bitmask == 0) {
+			byte = bitmap[++byte_offset];
+			bitmask = 0x80;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
+ * @bitmap: the bitmap
+ * @bit: the bit
+ * @state: if non-zero, set the bit (1) else clear the bit (0)
+ *
+ * Description:
+ * Set a single bit in the bitmask.  Returns zero on success, negative values
+ * on error.
+ */
+static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
+				   u32 bit,
+				   u8 state)
+{
+	u32 byte_spot;
+	u8 bitmask;
+
+	/* gcc always rounds to zero when doing integer division */
+	byte_spot = bit / 8;
+	bitmask = 0x80 >> (bit % 8);
+	if (state)
+		bitmap[byte_spot] |= bitmask;
+	else
+		bitmap[byte_spot] &= ~bitmask;
+}
+
+/**
+ * cipso_v4_doi_domhsh_free - Frees a domain list entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a domain list entry can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
+{
+	struct cipso_v4_domhsh_entry *ptr;
+
+	ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
+	kfree(ptr->domain);
+	kfree(ptr);
+}
+
+/**
+ * cipso_v4_cache_entry_free - Frees a cache entry
+ * @entry: the entry to free
+ *
+ * Description:
+ * This function frees the memory associated with a cache entry.
+ *
+ */
+static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
+{
+	if (entry->lsm_data.free)
+		entry->lsm_data.free(entry->lsm_data.data);
+	kfree(entry->key);
+	kfree(entry);
+}
+
+/**
+ * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
+ * @key: the hash key
+ * @key_len: the length of the key in bytes
+ *
+ * Description:
+ * The CIPSO tag hashing function.  Returns a 32-bit hash value.
+ *
+ */
+static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
+{
+	return jhash(key, key_len, 0);
+}
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+/**
+ * cipso_v4_cache_init - Initialize the CIPSO cache
+ *
+ * Description:
+ * Initializes the CIPSO label mapping cache, this function should be called
+ * before any of the other functions defined in this file.  Returns zero on
+ * success, negative values on error.
+ *
+ */
+static int cipso_v4_cache_init(void)
+{
+	u32 iter;
+
+	cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
+				 sizeof(struct cipso_v4_map_cache_bkt),
+				 GFP_KERNEL);
+	if (cipso_v4_cache == NULL)
+		return -ENOMEM;
+
+	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+		spin_lock_init(&cipso_v4_cache[iter].lock);
+		cipso_v4_cache[iter].size = 0;
+		INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
+	}
+
+	return 0;
+}
+
+/**
+ * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
+ *
+ * Description:
+ * Invalidates and frees any entries in the CIPSO cache.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+void cipso_v4_cache_invalidate(void)
+{
+	struct cipso_v4_map_cache_entry *entry, *tmp_entry;
+	u32 iter;
+
+	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+		spin_lock(&cipso_v4_cache[iter].lock);
+		list_for_each_entry_safe(entry,
+					 tmp_entry,
+					 &cipso_v4_cache[iter].list, list) {
+			list_del(&entry->list);
+			cipso_v4_cache_entry_free(entry);
+		}
+		cipso_v4_cache[iter].size = 0;
+		spin_unlock(&cipso_v4_cache[iter].lock);
+	}
+
+	return;
+}
+
+/**
+ * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
+ * @key: the buffer to check
+ * @key_len: buffer length in bytes
+ * @secattr: the security attribute struct to use
+ *
+ * Description:
+ * This function checks the cache to see if a label mapping already exists for
+ * the given key.  If there is a match then the cache is adjusted and the
+ * @secattr struct is populated with the correct LSM security attributes.  The
+ * cache is adjusted in the following manner if the entry is not already the
+ * first in the cache bucket:
+ *
+ *  1. The cache entry's activity counter is incremented
+ *  2. The previous (higher ranking) entry's activity counter is decremented
+ *  3. If the difference between the two activity counters is geater than
+ *     CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
+ *
+ * Returns zero on success, -ENOENT for a cache miss, and other negative values
+ * on error.
+ *
+ */
+static int cipso_v4_cache_check(const unsigned char *key,
+				u32 key_len,
+				struct netlbl_lsm_secattr *secattr)
+{
+	u32 bkt;
+	struct cipso_v4_map_cache_entry *entry;
+	struct cipso_v4_map_cache_entry *prev_entry = NULL;
+	u32 hash;
+
+	if (!cipso_v4_cache_enabled)
+		return -ENOENT;
+
+	hash = cipso_v4_map_cache_hash(key, key_len);
+	bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	spin_lock(&cipso_v4_cache[bkt].lock);
+	list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
+		if (entry->hash == hash &&
+		    entry->key_len == key_len &&
+		    memcmp(entry->key, key, key_len) == 0) {
+			entry->activity += 1;
+			secattr->cache.free = entry->lsm_data.free;
+			secattr->cache.data = entry->lsm_data.data;
+			if (prev_entry == NULL) {
+				spin_unlock(&cipso_v4_cache[bkt].lock);
+				return 0;
+			}
+
+			if (prev_entry->activity > 0)
+				prev_entry->activity -= 1;
+			if (entry->activity > prev_entry->activity &&
+			    entry->activity - prev_entry->activity >
+			    CIPSO_V4_CACHE_REORDERLIMIT) {
+				__list_del(entry->list.prev, entry->list.next);
+				__list_add(&entry->list,
+					   prev_entry->list.prev,
+					   &prev_entry->list);
+			}
+
+			spin_unlock(&cipso_v4_cache[bkt].lock);
+			return 0;
+		}
+		prev_entry = entry;
+	}
+	spin_unlock(&cipso_v4_cache[bkt].lock);
+
+	return -ENOENT;
+}
+
+/**
+ * cipso_v4_cache_add - Add an entry to the CIPSO cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add a new entry into the CIPSO label mapping cache.  Add the new entry to
+ * head of the cache bucket's list, if the cache bucket is out of room remove
+ * the last entry in the list first.  It is important to note that there is
+ * currently no checking for duplicate keys.  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int cipso_v4_cache_add(const struct sk_buff *skb,
+		       const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	u32 bkt;
+	struct cipso_v4_map_cache_entry *entry = NULL;
+	struct cipso_v4_map_cache_entry *old_entry = NULL;
+	unsigned char *cipso_ptr;
+	u32 cipso_ptr_len;
+
+	if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+		return 0;
+
+	cipso_ptr = CIPSO_V4_OPTPTR(skb);
+	cipso_ptr_len = cipso_ptr[1];
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL)
+		return -ENOMEM;
+	entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
+	if (entry->key == NULL) {
+		ret_val = -ENOMEM;
+		goto cache_add_failure;
+	}
+	memcpy(entry->key, cipso_ptr, cipso_ptr_len);
+	entry->key_len = cipso_ptr_len;
+	entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
+	entry->lsm_data.free = secattr->cache.free;
+	entry->lsm_data.data = secattr->cache.data;
+
+	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	spin_lock(&cipso_v4_cache[bkt].lock);
+	if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+		list_add(&entry->list, &cipso_v4_cache[bkt].list);
+		cipso_v4_cache[bkt].size += 1;
+	} else {
+		old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
+				       struct cipso_v4_map_cache_entry, list);
+		list_del(&old_entry->list);
+		list_add(&entry->list, &cipso_v4_cache[bkt].list);
+		cipso_v4_cache_entry_free(old_entry);
+	}
+	spin_unlock(&cipso_v4_cache[bkt].lock);
+
+	return 0;
+
+cache_add_failure:
+	if (entry)
+		cipso_v4_cache_entry_free(entry);
+	return ret_val;
+}
+
+/*
+ * DOI List Functions
+ */
+
+/**
+ * cipso_v4_doi_search - Searches for a DOI definition
+ * @doi: the DOI to search for
+ *
+ * Description:
+ * Search the DOI definition list for a DOI definition with a DOI value that
+ * matches @doi.  The caller is responsibile for calling rcu_read_[un]lock().
+ * Returns a pointer to the DOI definition on success and NULL on failure.
+ */
+static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
+{
+	struct cipso_v4_doi *iter;
+
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->doi == doi && iter->valid)
+			return iter;
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
+ * @doi_def: the DOI structure
+ *
+ * Description:
+ * The caller defines a new DOI for use by the CIPSO engine and calls this
+ * function to add it to the list of acceptable domains.  The caller must
+ * ensure that the mapping table specified in @doi_def->map meets all of the
+ * requirements of the mapping type (see cipso_ipv4.h for details).  Returns
+ * zero on success and non-zero on failure.
+ *
+ */
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+		return -EINVAL;
+
+	doi_def->valid = 1;
+	INIT_RCU_HEAD(&doi_def->rcu);
+	INIT_LIST_HEAD(&doi_def->dom_list);
+
+	rcu_read_lock();
+	if (cipso_v4_doi_search(doi_def->doi) != NULL)
+		goto doi_add_failure_rlock;
+	spin_lock(&cipso_v4_doi_list_lock);
+	if (cipso_v4_doi_search(doi_def->doi) != NULL)
+		goto doi_add_failure_slock;
+	list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return 0;
+
+doi_add_failure_slock:
+	spin_unlock(&cipso_v4_doi_list_lock);
+doi_add_failure_rlock:
+	rcu_read_unlock();
+	return -EEXIST;
+}
+
+/**
+ * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
+ * @doi: the DOI value
+ * @callback: the DOI cleanup/free callback
+ *
+ * Description:
+ * Removes a DOI definition from the CIPSO engine, @callback is called to
+ * free any memory.  The NetLabel routines will be called to release their own
+ * LSM domain mappings as well as our own domain list.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
+{
+	struct cipso_v4_doi *doi_def;
+	struct cipso_v4_domhsh_entry *dom_iter;
+
+	rcu_read_lock();
+	if (cipso_v4_doi_search(doi) != NULL) {
+		spin_lock(&cipso_v4_doi_list_lock);
+		doi_def = cipso_v4_doi_search(doi);
+		if (doi_def == NULL) {
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			return -ENOENT;
+		}
+		doi_def->valid = 0;
+		list_del_rcu(&doi_def->list);
+		spin_unlock(&cipso_v4_doi_list_lock);
+		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
+			if (dom_iter->valid)
+				netlbl_domhsh_remove(dom_iter->domain);
+		cipso_v4_cache_invalidate();
+		rcu_read_unlock();
+
+		call_rcu(&doi_def->rcu, callback);
+		return 0;
+	}
+	rcu_read_unlock();
+
+	return -ENOENT;
+}
+
+/**
+ * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * @doi: the DOI value
+ *
+ * Description:
+ * Searches for a valid DOI definition and if one is found it is returned to
+ * the caller.  Otherwise NULL is returned.  The caller must ensure that
+ * rcu_read_lock() is held while accessing the returned definition.
+ *
+ */
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+	return cipso_v4_doi_search(doi);
+}
+
+/**
+ * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Dump a list of all the configured DOI values into a sk_buff.  The returned
+ * sk_buff has room at the front of the sk_buff for @headroom bytes.  See
+ * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format.  This
+ * function may fail if another process is changing the DOI list at the same
+ * time.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	struct cipso_v4_doi *iter;
+	u32 doi_cnt = 0;
+	ssize_t buf_len;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->valid) {
+			doi_cnt += 1;
+			buf_len += 2 * NETLBL_LEN_U32;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto doi_dump_all_failure;
+
+	if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
+		goto doi_dump_all_failure;
+	buf_len -= NETLBL_LEN_U32;
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->valid) {
+			if (buf_len < 2 * NETLBL_LEN_U32)
+				goto doi_dump_all_failure;
+			if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
+				goto doi_dump_all_failure;
+			if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+				goto doi_dump_all_failure;
+			buf_len -= 2 * NETLBL_LEN_U32;
+		}
+	rcu_read_unlock();
+
+	return skb;
+
+doi_dump_all_failure:
+	rcu_read_unlock();
+	kfree(skb);
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
+ * @doi: the DOI value
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Lookup the DOI definition matching @doi and dump it's contents into a
+ * sk_buff.  The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes.  See net/netlabel/netlabel_cipso_v4.h for the LIST message
+ * format.  This function may fail if another process is changing the DOI list
+ * at the same time.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	struct cipso_v4_doi *iter;
+	u32 tag_cnt = 0;
+	u32 lvl_cnt = 0;
+	u32 cat_cnt = 0;
+	ssize_t buf_len;
+	ssize_t tmp;
+
+	rcu_read_lock();
+	iter = cipso_v4_doi_getdef(doi);
+	if (iter == NULL)
+		goto doi_dump_failure;
+	buf_len = NETLBL_LEN_U32;
+	switch (iter->type) {
+	case CIPSO_V4_MAP_PASS:
+		buf_len += NETLBL_LEN_U32;
+		while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+		      iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+			tag_cnt += 1;
+			buf_len += NETLBL_LEN_U8;
+		}
+		break;
+	case CIPSO_V4_MAP_STD:
+		buf_len += 3 * NETLBL_LEN_U32;
+		while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+		       iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+			tag_cnt += 1;
+			buf_len += NETLBL_LEN_U8;
+		}
+		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+			if (iter->map.std->lvl.local[tmp] !=
+			    CIPSO_V4_INV_LVL) {
+				lvl_cnt += 1;
+				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
+			}
+		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+			if (iter->map.std->cat.local[tmp] !=
+			    CIPSO_V4_INV_CAT) {
+				cat_cnt += 1;
+				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
+			}
+		break;
+	}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto doi_dump_failure;
+
+	if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+		goto doi_dump_failure;
+	buf_len -= NETLBL_LEN_U32;
+	if (iter != cipso_v4_doi_getdef(doi))
+		goto doi_dump_failure;
+	switch (iter->type) {
+	case CIPSO_V4_MAP_PASS:
+		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+			goto doi_dump_failure;
+		buf_len -= NETLBL_LEN_U32;
+		for (tmp = 0;
+		     tmp < CIPSO_V4_TAG_MAXCNT &&
+			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+		     tmp++) {
+			if (buf_len < NETLBL_LEN_U8)
+				goto doi_dump_failure;
+			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+				goto doi_dump_failure;
+			buf_len -= NETLBL_LEN_U8;
+		}
+		break;
+	case CIPSO_V4_MAP_STD:
+		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+			goto doi_dump_failure;
+		if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
+			goto doi_dump_failure;
+		if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
+			goto doi_dump_failure;
+		buf_len -= 3 * NETLBL_LEN_U32;
+		for (tmp = 0;
+		     tmp < CIPSO_V4_TAG_MAXCNT &&
+			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+		     tmp++) {
+			if (buf_len < NETLBL_LEN_U8)
+				goto doi_dump_failure;
+			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+				goto doi_dump_failure;
+			buf_len -= NETLBL_LEN_U8;
+		}
+		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+			if (iter->map.std->lvl.local[tmp] !=
+			    CIPSO_V4_INV_LVL) {
+				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
+					goto doi_dump_failure;
+				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+					goto doi_dump_failure;
+				if (nla_put_u8(skb,
+					   NLA_U8,
+					   iter->map.std->lvl.local[tmp]) != 0)
+					goto doi_dump_failure;
+				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
+			}
+		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+			if (iter->map.std->cat.local[tmp] !=
+			    CIPSO_V4_INV_CAT) {
+				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
+					goto doi_dump_failure;
+				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+					goto doi_dump_failure;
+				if (nla_put_u16(skb,
+					   NLA_U16,
+					   iter->map.std->cat.local[tmp]) != 0)
+					goto doi_dump_failure;
+				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
+			}
+		break;
+	}
+	rcu_read_unlock();
+
+	return skb;
+
+doi_dump_failure:
+	rcu_read_unlock();
+	kfree(skb);
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to add
+ *
+ * Description:
+ * Adds the @domain to the the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel).  This function
+ * does allocate memory.  Returns zero on success, negative values on failure.
+ *
+ */
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
+{
+	struct cipso_v4_domhsh_entry *iter;
+	struct cipso_v4_domhsh_entry *new_dom;
+
+	new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
+	if (new_dom == NULL)
+		return -ENOMEM;
+	if (domain) {
+		new_dom->domain = kstrdup(domain, GFP_KERNEL);
+		if (new_dom->domain == NULL) {
+			kfree(new_dom);
+			return -ENOMEM;
+		}
+	}
+	new_dom->valid = 1;
+	INIT_RCU_HEAD(&new_dom->rcu);
+
+	rcu_read_lock();
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+		if (iter->valid &&
+		    ((domain != NULL && iter->domain != NULL &&
+		      strcmp(iter->domain, domain) == 0) ||
+		     (domain == NULL && iter->domain == NULL))) {
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			kfree(new_dom->domain);
+			kfree(new_dom);
+			return -EEXIST;
+		}
+	list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes the @domain from the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel).   Returns zero
+ * on success and negative values on error.
+ *
+ */
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+			       const char *domain)
+{
+	struct cipso_v4_domhsh_entry *iter;
+
+	rcu_read_lock();
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+		if (iter->valid &&
+		    ((domain != NULL && iter->domain != NULL &&
+		      strcmp(iter->domain, domain) == 0) ||
+		     (domain == NULL && iter->domain == NULL))) {
+			iter->valid = 0;
+			list_del_rcu(&iter->list);
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
+
+			return 0;
+		}
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return -ENOENT;
+}
+
+/*
+ * Label Mapping Functions
+ */
+
+/**
+ * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
+ * @doi_def: the DOI definition
+ * @level: the level to check
+ *
+ * Description:
+ * Checks the given level against the given DOI definition and returns a
+ * negative value if the level does not have a valid mapping and a zero value
+ * if the level is defined by the DOI.
+ *
+ */
+static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
+{
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+			return 0;
+		break;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
+ * @doi_def: the DOI definition
+ * @host_lvl: the host MLS level
+ * @net_lvl: the network/CIPSO MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS level to the correct
+ * CIPSO level using the given DOI definition.  Returns zero on success,
+ * negative values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
+				 u32 host_lvl,
+				 u32 *net_lvl)
+{
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		*net_lvl = host_lvl;
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		if (host_lvl < doi_def->map.std->lvl.local_size) {
+			*net_lvl = doi_def->map.std->lvl.local[host_lvl];
+			return 0;
+		}
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
+ * @doi_def: the DOI definition
+ * @net_lvl: the network/CIPSO MLS level
+ * @host_lvl: the host MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO level to the correct local MLS
+ * level using the given DOI definition.  Returns zero on success, negative
+ * values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
+				 u32 net_lvl,
+				 u32 *host_lvl)
+{
+	struct cipso_v4_std_map_tbl *map_tbl;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		*host_lvl = net_lvl;
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		map_tbl = doi_def->map.std;
+		if (net_lvl < map_tbl->lvl.cipso_size &&
+		    map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
+			*host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
+			return 0;
+		}
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
+ * @doi_def: the DOI definition
+ * @bitmap: category bitmap
+ * @bitmap_len: bitmap length in bytes
+ *
+ * Description:
+ * Checks the given category bitmap against the given DOI definition and
+ * returns a negative value if any of the categories in the bitmap do not have
+ * a valid mapping and a zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
+				      const unsigned char *bitmap,
+				      u32 bitmap_len)
+{
+	int cat = -1;
+	u32 bitmap_len_bits = bitmap_len * 8;
+	u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
+	u32 *cipso_array = doi_def->map.std->cat.cipso;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			cat = cipso_v4_bitmap_walk(bitmap,
+						   bitmap_len_bits,
+						   cat + 1,
+						   1);
+			if (cat < 0)
+				break;
+			if (cat >= cipso_cat_size ||
+			    cipso_array[cat] >= CIPSO_V4_INV_CAT)
+				return -EFAULT;
+		}
+
+		if (cat == -1)
+			return 0;
+		break;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @host_cat: the category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ * @net_cat: the zero'd out category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO bitmap using the given DOI definition.  Returns the minimum
+ * size in bytes of the network bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
+				     const unsigned char *host_cat,
+				     u32 host_cat_len,
+				     unsigned char *net_cat,
+				     u32 net_cat_len)
+{
+	int host_spot = -1;
+	u32 net_spot;
+	u32 net_spot_max = 0;
+	u32 host_clen_bits = host_cat_len * 8;
+	u32 net_clen_bits = net_cat_len * 8;
+	u32 host_cat_size = doi_def->map.std->cat.local_size;
+	u32 *host_cat_array = doi_def->map.std->cat.local;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		net_spot_max = host_cat_len - 1;
+		while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
+			net_spot_max--;
+		if (net_spot_max > net_cat_len)
+			return -EINVAL;
+		memcpy(net_cat, host_cat, net_spot_max);
+		return net_spot_max;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			host_spot = cipso_v4_bitmap_walk(host_cat,
+							 host_clen_bits,
+							 host_spot + 1,
+							 1);
+			if (host_spot < 0)
+				break;
+			if (host_spot >= host_cat_size)
+				return -EPERM;
+
+			net_spot = host_cat_array[host_spot];
+			if (net_spot >= net_clen_bits)
+				return -ENOSPC;
+			cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
+
+			if (net_spot > net_spot_max)
+				net_spot_max = net_spot;
+		}
+
+		if (host_spot == -2)
+			return -EFAULT;
+
+		if (++net_spot_max % 8)
+			return net_spot_max / 8 + 1;
+		return net_spot_max / 8;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @host_cat: the zero'd out category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO bitmap to the correct local
+ * MLS category bitmap using the given DOI definition.  Returns the minimum
+ * size in bytes of the host bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
+				     const unsigned char *net_cat,
+				     u32 net_cat_len,
+				     unsigned char *host_cat,
+				     u32 host_cat_len)
+{
+	u32 host_spot;
+	u32 host_spot_max = 0;
+	int net_spot = -1;
+	u32 net_clen_bits = net_cat_len * 8;
+	u32 host_clen_bits = host_cat_len * 8;
+	u32 net_cat_size = doi_def->map.std->cat.cipso_size;
+	u32 *net_cat_array = doi_def->map.std->cat.cipso;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		if (net_cat_len > host_cat_len)
+			return -EINVAL;
+		memcpy(host_cat, net_cat, net_cat_len);
+		return net_cat_len;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			net_spot = cipso_v4_bitmap_walk(net_cat,
+							net_clen_bits,
+							net_spot + 1,
+							1);
+			if (net_spot < 0)
+				break;
+			if (net_spot >= net_cat_size ||
+			    net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
+				return -EPERM;
+
+			host_spot = net_cat_array[net_spot];
+			if (host_spot >= host_clen_bits)
+				return -ENOSPC;
+			cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
+
+			if (host_spot > host_spot_max)
+				host_spot_max = host_spot;
+		}
+
+		if (net_spot == -2)
+			return -EFAULT;
+
+		if (++host_spot_max % 8)
+			return host_spot_max / 8 + 1;
+		return host_spot_max / 8;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * Protocol Handling Functions
+ */
+
+#define CIPSO_V4_HDR_LEN              6
+
+/**
+ * cipso_v4_gentag_hdr - Generate a CIPSO option header
+ * @doi_def: the DOI definition
+ * @len: the total tag length in bytes
+ * @buf: the CIPSO option buffer
+ *
+ * Description:
+ * Write a CIPSO header into the beginning of @buffer.  Return zero on success,
+ * negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
+			       u32 len,
+			       unsigned char *buf)
+{
+	if (CIPSO_V4_HDR_LEN + len > 40)
+		return -ENOSPC;
+
+	buf[0] = IPOPT_CIPSO;
+	buf[1] = CIPSO_V4_HDR_LEN + len;
+	*(u32 *)&buf[2] = htonl(doi_def->doi);
+
+	return 0;
+}
+
+#define CIPSO_V4_TAG1_CAT_LEN         30
+
+/**
+ * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the restricted bitmap tag, tag type #1.  The
+ * actual buffer length may be larger than the indicated size due to
+ * translation between host and network category bitmaps.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
+			       const struct netlbl_lsm_secattr *secattr,
+			       unsigned char **buffer,
+			       u32 *buffer_len)
+{
+	int ret_val = -EPERM;
+	unsigned char *buf = NULL;
+	u32 buf_len;
+	u32 level;
+
+	if (secattr->mls_cat) {
+		buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
+			      GFP_ATOMIC);
+		if (buf == NULL)
+			return -ENOMEM;
+
+		ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
+						    secattr->mls_cat,
+						    secattr->mls_cat_len,
+						    &buf[CIPSO_V4_HDR_LEN + 4],
+						    CIPSO_V4_TAG1_CAT_LEN);
+		if (ret_val < 0)
+			goto gentag_failure;
+
+		/* This will send packets using the "optimized" format when
+		 * possibile as specified in  section 3.4.2.6 of the
+		 * CIPSO draft. */
+		if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
+			ret_val = 10;
+
+		buf_len = 4 + ret_val;
+	} else {
+		buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
+		if (buf == NULL)
+			return -ENOMEM;
+		buf_len = 4;
+	}
+
+	ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+	if (ret_val != 0)
+		goto gentag_failure;
+
+	ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
+	if (ret_val != 0)
+		goto gentag_failure;
+
+	buf[CIPSO_V4_HDR_LEN] = 0x01;
+	buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
+	buf[CIPSO_V4_HDR_LEN + 3] = level;
+
+	*buffer = buf;
+	*buffer_len = CIPSO_V4_HDR_LEN + buf_len;
+
+	return 0;
+
+gentag_failure:
+	kfree(buf);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
+ * attributes in @secattr.  Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+				 const unsigned char *tag,
+				 struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	u8 tag_len = tag[1];
+	u32 level;
+
+	ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
+	if (ret_val != 0)
+		return ret_val;
+	secattr->mls_lvl = level;
+	secattr->mls_lvl_vld = 1;
+
+	if (tag_len > 4) {
+		switch (doi_def->type) {
+		case CIPSO_V4_MAP_PASS:
+			secattr->mls_cat_len = tag_len - 4;
+			break;
+		case CIPSO_V4_MAP_STD:
+			secattr->mls_cat_len =
+				doi_def->map.std->cat.local_size;
+			break;
+		}
+		secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
+		if (secattr->mls_cat == NULL)
+			return -ENOMEM;
+
+		ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+						    &tag[4],
+						    tag_len - 4,
+						    secattr->mls_cat,
+						    secattr->mls_cat_len);
+		if (ret_val < 0) {
+			kfree(secattr->mls_cat);
+			return ret_val;
+		}
+		secattr->mls_cat_len = ret_val;
+	}
+
+	return 0;
+}
+
+/**
+ * cipso_v4_validate - Validate a CIPSO option
+ * @option: the start of the option, on error it is set to point to the error
+ *
+ * Description:
+ * This routine is called to validate a CIPSO option, it checks all of the
+ * fields to ensure that they are at least valid, see the draft snippet below
+ * for details.  If the option is valid then a zero value is returned and
+ * the value of @option is unchanged.  If the option is invalid then a
+ * non-zero value is returned and @option is adjusted to point to the
+ * offending portion of the option.  From the IETF draft ...
+ *
+ *  "If any field within the CIPSO options, such as the DOI identifier, is not
+ *   recognized the IP datagram is discarded and an ICMP 'parameter problem'
+ *   (type 12) is generated and returned.  The ICMP code field is set to 'bad
+ *   parameter' (code 0) and the pointer is set to the start of the CIPSO field
+ *   that is unrecognized."
+ *
+ */
+int cipso_v4_validate(unsigned char **option)
+{
+	unsigned char *opt = *option;
+	unsigned char *tag;
+	unsigned char opt_iter;
+	unsigned char err_offset = 0;
+	u8 opt_len;
+	u8 tag_len;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 tag_iter;
+
+	/* caller already checks for length values that are too large */
+	opt_len = opt[1];
+	if (opt_len < 8) {
+		err_offset = 1;
+		goto validate_return;
+	}
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
+	if (doi_def == NULL) {
+		err_offset = 2;
+		goto validate_return_locked;
+	}
+
+	opt_iter = 6;
+	tag = opt + opt_iter;
+	while (opt_iter < opt_len) {
+		for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
+			if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
+			    ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
+				err_offset = opt_iter;
+				goto validate_return_locked;
+			}
+
+		tag_len = tag[1];
+		if (tag_len > (opt_len - opt_iter)) {
+			err_offset = opt_iter + 1;
+			goto validate_return_locked;
+		}
+
+		switch (tag[0]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			if (tag_len < 4) {
+				err_offset = opt_iter + 1;
+				goto validate_return_locked;
+			}
+
+			/* We are already going to do all the verification
+			 * necessary at the socket layer so from our point of
+			 * view it is safe to turn these checks off (and less
+			 * work), however, the CIPSO draft says we should do
+			 * all the CIPSO validations here but it doesn't
+			 * really specify _exactly_ what we need to validate
+			 * ... so, just make it a sysctl tunable. */
+			if (cipso_v4_rbm_strictvalid) {
+				if (cipso_v4_map_lvl_valid(doi_def,
+							   tag[3]) < 0) {
+					err_offset = opt_iter + 3;
+					goto validate_return_locked;
+				}
+				if (tag_len > 4 &&
+				    cipso_v4_map_cat_rbm_valid(doi_def,
+							    &tag[4],
+							    tag_len - 4) < 0) {
+					err_offset = opt_iter + 4;
+					goto validate_return_locked;
+				}
+			}
+			break;
+		default:
+			err_offset = opt_iter;
+			goto validate_return_locked;
+		}
+
+		tag += tag_len;
+		opt_iter += tag_len;
+	}
+
+validate_return_locked:
+	rcu_read_unlock();
+validate_return:
+	*option = opt + err_offset;
+	return err_offset;
+}
+
+/**
+ * cipso_v4_error - Send the correct reponse for a bad packet
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: CIPSO gateway flag
+ *
+ * Description:
+ * Based on the error code given in @error, send an ICMP error message back to
+ * the originating host.  From the IETF draft ...
+ *
+ *  "If the contents of the CIPSO [option] are valid but the security label is
+ *   outside of the configured host or port label range, the datagram is
+ *   discarded and an ICMP 'destination unreachable' (type 3) is generated and
+ *   returned.  The code field of the ICMP is set to 'communication with
+ *   destination network administratively prohibited' (code 9) or to
+ *   'communication with destination host administratively prohibited'
+ *   (code 10).  The value of the code is dependent on whether the originator
+ *   of the ICMP message is acting as a CIPSO host or a CIPSO gateway.  The
+ *   recipient of the ICMP message MUST be able to handle either value.  The
+ *   same procedure is performed if a CIPSO [option] can not be added to an
+ *   IP packet because it is too large to fit in the IP options area."
+ *
+ *  "If the error is triggered by receipt of an ICMP message, the message is
+ *   discarded and no response is permitted (consistent with general ICMP
+ *   processing rules)."
+ *
+ */
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
+{
+	if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+		return;
+
+	if (gateway)
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+	else
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+}
+
+/**
+ * cipso_v4_socket_setattr - Add a CIPSO option to a socket
+ * @sock: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function.  This function requires
+ * exclusive access to @sock->sk, which means it either needs to be in the
+ * process of being created or locked via lock_sock(sock->sk).  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_setattr(const struct socket *sock,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	u32 iter;
+	unsigned char *buf = NULL;
+	u32 buf_len = 0;
+	u32 opt_len;
+	struct ip_options *opt = NULL;
+	struct sock *sk;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
+
+	/* In the case of sock_create_lite(), the sock->sk field is not
+	 * defined yet but it is not a problem as the only users of these
+	 * "lite" PF_INET sockets are functions which do an accept() call
+	 * afterwards so we will label the socket as part of the accept(). */
+	sk = sock->sk;
+	if (sk == NULL)
+		return 0;
+
+	/* XXX - This code assumes only one tag per CIPSO option which isn't
+	 * really a good assumption to make but since we only support the MAC
+	 * tags right now it is a safe assumption. */
+	iter = 0;
+	do {
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			ret_val = cipso_v4_gentag_rbm(doi_def,
+						      secattr,
+						      &buf,
+						      &buf_len);
+			break;
+		default:
+			ret_val = -EPERM;
+			goto socket_setattr_failure;
+		}
+
+		iter++;
+	} while (ret_val != 0 &&
+		 iter < CIPSO_V4_TAG_MAXCNT &&
+		 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
+	if (ret_val != 0)
+		goto socket_setattr_failure;
+
+	/* We can't use ip_options_get() directly because it makes a call to
+	 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
+	 * we can't block here. */
+	opt_len = (buf_len + 3) & ~3;
+	opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
+	if (opt == NULL) {
+		ret_val = -ENOMEM;
+		goto socket_setattr_failure;
+	}
+	memcpy(opt->__data, buf, buf_len);
+	opt->optlen = opt_len;
+	opt->is_data = 1;
+	kfree(buf);
+	buf = NULL;
+	ret_val = ip_options_compile(opt, NULL);
+	if (ret_val != 0)
+		goto socket_setattr_failure;
+
+	sk_inet = inet_sk(sk);
+	if (sk_inet->is_icsk) {
+		sk_conn = inet_csk(sk);
+		if (sk_inet->opt)
+			sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
+		sk_conn->icsk_ext_hdr_len += opt->optlen;
+		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+	}
+	opt = xchg(&sk_inet->opt, opt);
+	kfree(opt);
+
+	return 0;
+
+socket_setattr_failure:
+	kfree(buf);
+	kfree(opt);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOMSG;
+	struct sock *sk;
+	struct inet_sock *sk_inet;
+	unsigned char *cipso_ptr;
+	u32 doi;
+	struct cipso_v4_doi *doi_def;
+
+	sk = sock->sk;
+	lock_sock(sk);
+	sk_inet = inet_sk(sk);
+	if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
+		goto socket_getattr_return;
+	cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
+		sizeof(struct iphdr);
+	ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
+	if (ret_val == 0)
+		goto socket_getattr_return;
+
+	doi = ntohl(*(u32 *)&cipso_ptr[2]);
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL) {
+		rcu_read_unlock();
+		goto socket_getattr_return;
+	}
+	switch (cipso_ptr[6]) {
+	case CIPSO_V4_TAG_RBITMAP:
+		ret_val = cipso_v4_parsetag_rbm(doi_def,
+						&cipso_ptr[6],
+						secattr);
+		break;
+	}
+	rcu_read_unlock();
+
+socket_getattr_return:
+	release_sock(sk);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse the given packet's CIPSO option and return the security attributes.
+ * Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOMSG;
+	unsigned char *cipso_ptr;
+	u32 doi;
+	struct cipso_v4_doi *doi_def;
+
+	if (!CIPSO_V4_OPTEXIST(skb))
+		return -ENOMSG;
+	cipso_ptr = CIPSO_V4_OPTPTR(skb);
+	if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
+		return 0;
+
+	doi = ntohl(*(u32 *)&cipso_ptr[2]);
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL)
+		goto skbuff_getattr_return;
+	switch (cipso_ptr[6]) {
+	case CIPSO_V4_TAG_RBITMAP:
+		ret_val = cipso_v4_parsetag_rbm(doi_def,
+						&cipso_ptr[6],
+						secattr);
+		break;
+	}
+
+skbuff_getattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * cipso_v4_init - Initialize the CIPSO module
+ *
+ * Description:
+ * Initialize the CIPSO module and prepare it for use.  Returns zero on success
+ * and negative values on failure.
+ *
+ */
+static int __init cipso_v4_init(void)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_cache_init();
+	if (ret_val != 0)
+		panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
+		      ret_val);
+
+	return 0;
+}
+
+subsys_initcall(cipso_v4_init);
diff -puN net/ipv4/devinet.c~git-net net/ipv4/devinet.c
--- a/net/ipv4/devinet.c~git-net
+++ a/net/ipv4/devinet.c
@@ -43,6 +43,7 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/if_addr.h>
 #include <linux/if_ether.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -62,6 +63,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/netlink.h>
 
 struct ipv4_devconf ipv4_devconf = {
 	.accept_redirects = 1,
@@ -78,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_
 	.accept_source_route = 1,
 };
 
-static void rtmsg_ifa(int event, struct in_ifaddr *);
+static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+	[IFA_LOCAL]     	= { .type = NLA_U32 },
+	[IFA_ADDRESS]   	= { .type = NLA_U32 },
+	[IFA_BROADCAST] 	= { .type = NLA_U32 },
+	[IFA_ANYCAST]   	= { .type = NLA_U32 },
+	[IFA_LABEL]     	= { .type = NLA_STRING },
+};
+
+static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -229,8 +239,8 @@ int inet_addr_onlink(struct in_device *i
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-			 int destroy)
+static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy, struct nlmsghdr *nlh, u32 pid)
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -263,7 +273,7 @@ static void inet_del_ifa(struct in_devic
 			if (!do_promote) {
 				*ifap1 = ifa->ifa_next;
 
-				rtmsg_ifa(RTM_DELADDR, ifa);
+				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 				blocking_notifier_call_chain(&inetaddr_chain,
 						NETDEV_DOWN, ifa);
 				inet_free_ifa(ifa);
@@ -288,7 +298,7 @@ static void inet_del_ifa(struct in_devic
 	   is valid, it will try to restore deleted routes... Grr.
 	   So that, this order is correct.
 	 */
-	rtmsg_ifa(RTM_DELADDR, ifa1);
+	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
@@ -300,7 +310,7 @@ static void inet_del_ifa(struct in_devic
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
-		rtmsg_ifa(RTM_NEWADDR, promote);
+		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -319,7 +329,14 @@ static void inet_del_ifa(struct in_devic
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy)
+{
+	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
+}
+
+static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
+			     u32 pid)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -364,12 +381,17 @@ static int inet_insert_ifa(struct in_ifa
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
-	rtmsg_ifa(RTM_NEWADDR, ifa);
+	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 
 	return 0;
 }
 
+static int inet_insert_ifa(struct in_ifaddr *ifa)
+{
+	return __inet_insert_ifa(ifa, NULL, 0);
+}
+
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -421,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(stru
 
 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr **rta = arg;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in_device *in_dev;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
 	struct in_ifaddr *ifa, **ifap;
+	int err = -EINVAL;
 
 	ASSERT_RTNL();
 
-	if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
-		goto out;
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	in_dev = inetdev_by_index(ifm->ifa_index);
+	if (in_dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
 	__in_dev_put(in_dev);
 
 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 	     ifap = &ifa->ifa_next) {
-		if ((rta[IFA_LOCAL - 1] &&
-		     memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
-			    &ifa->ifa_local, 4)) ||
-		    (rta[IFA_LABEL - 1] &&
-		     rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
-		    (rta[IFA_ADDRESS - 1] &&
-		     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
-		      !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
-			      	      ifa))))
+		if (tb[IFA_LOCAL] &&
+		    ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL]))
 			continue;
-		inet_del_ifa(in_dev, ifap, 1);
+
+		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
+			continue;
+
+		if (tb[IFA_ADDRESS] &&
+		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+		    !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
+			continue;
+
+		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 		return 0;
 	}
-out:
-	return -EADDRNOTAVAIL;
+
+	err = -EADDRNOTAVAIL;
+errout:
+	return err;
 }
 
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
 {
-	struct rtattr **rta = arg;
+	struct nlattr *tb[IFA_MAX+1];
+	struct in_ifaddr *ifa;
+	struct ifaddrmsg *ifm;
 	struct net_device *dev;
 	struct in_device *in_dev;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
-	struct in_ifaddr *ifa;
-	int rc = -EINVAL;
+	int err = -EINVAL;
 
-	ASSERT_RTNL();
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+	if (err < 0)
+		goto errout;
 
-	if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
-		goto out;
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
+		goto errout;
 
-	rc = -ENODEV;
-	if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
-		goto out;
+	dev = __dev_get_by_index(ifm->ifa_index);
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
 
-	rc = -ENOBUFS;
-	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
+	in_dev = __in_dev_get_rtnl(dev);
+	if (in_dev == NULL) {
 		in_dev = inetdev_init(dev);
-		if (!in_dev)
-			goto out;
+		if (in_dev == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
 	}
 
-	if ((ifa = inet_alloc_ifa()) == NULL)
-		goto out;
+	ifa = inet_alloc_ifa();
+	if (ifa == NULL) {
+		/*
+		 * A potential indev allocation can be left alive, it stays
+		 * assigned to its device and is destroy with it.
+		 */
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	in_dev_hold(in_dev);
+
+	if (tb[IFA_ADDRESS] == NULL)
+		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 
-	if (!rta[IFA_ADDRESS - 1])
-		rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
-	memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
-	memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
-	if (rta[IFA_BROADCAST - 1])
-		memcpy(&ifa->ifa_broadcast,
-		       RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
-	if (rta[IFA_ANYCAST - 1])
-		memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
 	ifa->ifa_flags = ifm->ifa_flags;
 	ifa->ifa_scope = ifm->ifa_scope;
-	in_dev_hold(in_dev);
-	ifa->ifa_dev   = in_dev;
-	if (rta[IFA_LABEL - 1])
-		rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
+	ifa->ifa_dev = in_dev;
+
+	ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]);
+	ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]);
+
+	if (tb[IFA_BROADCAST])
+		ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]);
+
+	if (tb[IFA_ANYCAST])
+		ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]);
+
+	if (tb[IFA_LABEL])
+		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
-	rc = inet_insert_ifa(ifa);
-out:
-	return rc;
+	return ifa;
+
+errout:
+	return ERR_PTR(err);
+}
+
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct in_ifaddr *ifa;
+
+	ASSERT_RTNL();
+
+	ifa = rtm_to_ifaddr(nlh);
+	if (IS_ERR(ifa))
+		return PTR_ERR(ifa);
+
+	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 }
 
 /*
@@ -1056,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_bu
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+
 	if (ifa->ifa_address)
-		RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+		NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address);
+
 	if (ifa->ifa_local)
-		RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+		NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local);
+
 	if (ifa->ifa_broadcast)
-		RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+		NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
+
 	if (ifa->ifa_anycast)
-		RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+		NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast);
+
 	if (ifa->ifa_label[0])
-		RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
+
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1127,19 +1201,27 @@ done:
 	return skb->len;
 }
 
-static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
+static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
+		      u32 pid)
 {
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+	struct sk_buff *skb;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
 
-	if (!skb)
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
-	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
-	} else {
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
+		goto errout;
 	}
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
 }
 
 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -1151,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetl
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
 				      .dumpit	= inet_dump_fib,	},
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	[RTM_NEWRULE  - RTM_BASE] = { .doit	= inet_rtm_newrule,	},
-	[RTM_DELRULE  - RTM_BASE] = { .doit	= inet_rtm_delrule,	},
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= inet_dump_rules,	},
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
 #endif
 };
 
diff -puN net/ipv4/fib_frontend.c~git-net net/ipv4/fib_frontend.c
--- a/net/ipv4/fib_frontend.c~git-net
+++ a/net/ipv4/fib_frontend.c
@@ -32,10 +32,12 @@
 #include <linux/inet.h>
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/list.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -50,48 +52,67 @@
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
-#define RT_TABLE_MIN RT_TABLE_MAIN
-
 struct fib_table *ip_fib_local_table;
 struct fib_table *ip_fib_main_table;
 
-#else
+#define FIB_TABLE_HASHSZ 1
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-#define RT_TABLE_MIN 1
+#else
 
-struct fib_table *fib_tables[RT_TABLE_MAX+1];
+#define FIB_TABLE_HASHSZ 256
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-struct fib_table *__fib_new_table(int id)
+struct fib_table *fib_new_table(u32 id)
 {
 	struct fib_table *tb;
+	unsigned int h;
 
+	if (id == 0)
+		id = RT_TABLE_MAIN;
+	tb = fib_get_table(id);
+	if (tb)
+		return tb;
 	tb = fib_hash_init(id);
 	if (!tb)
 		return NULL;
-	fib_tables[id] = tb;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
 	return tb;
 }
 
+struct fib_table *fib_get_table(u32 id)
+{
+	struct fib_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
 
+	if (id == 0)
+		id = RT_TABLE_MAIN;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
+		if (tb->tb_id == id) {
+			rcu_read_unlock();
+			return tb;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-
 static void fib_flush(void)
 {
 	int flushed = 0;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_table *tb;
-	int id;
+	struct hlist_node *node;
+	unsigned int h;
 
-	for (id = RT_TABLE_MAX; id>0; id--) {
-		if ((tb = fib_get_table(id))==NULL)
-			continue;
-		flushed += tb->tb_flush(tb);
-	}
-#else /* CONFIG_IP_MULTIPLE_TABLES */
-	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
-	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
-#endif /* CONFIG_IP_MULTIPLE_TABLES */
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
+			flushed += tb->tb_flush(tb);
+	}
 
 	if (flushed)
 		rt_cache_flush(-1);
@@ -232,42 +253,190 @@ e_inval:
 
 #ifndef CONFIG_IP_NOSIOCRT
 
+static inline u32 sk_extract_addr(struct sockaddr *addr)
+{
+	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+}
+
+static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
+{
+	struct nlattr *nla;
+
+	nla = (struct nlattr *) ((char *) mx + len);
+	nla->nla_type = type;
+	nla->nla_len = nla_attr_size(4);
+	*(u32 *) nla_data(nla) = value;
+
+	return len + nla_total_size(4);
+}
+
+static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
+				 struct fib_config *cfg)
+{
+	u32 addr;
+	int plen;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	if (rt->rt_dst.sa_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	/*
+	 * Check mask for validity:
+	 * a) it must be contiguous.
+	 * b) destination must have all host bits clear.
+	 * c) if application forgot to set correct family (AF_INET),
+	 *    reject request unless it is absolutely clear i.e.
+	 *    both family and mask are zero.
+	 */
+	plen = 32;
+	addr = sk_extract_addr(&rt->rt_dst);
+	if (!(rt->rt_flags & RTF_HOST)) {
+		u32 mask = sk_extract_addr(&rt->rt_genmask);
+
+		if (rt->rt_genmask.sa_family != AF_INET) {
+			if (mask || rt->rt_genmask.sa_family)
+				return -EAFNOSUPPORT;
+		}
+
+		if (bad_mask(mask, addr))
+			return -EINVAL;
+
+		plen = inet_mask_len(mask);
+	}
+
+	cfg->fc_dst_len = plen;
+	cfg->fc_dst = addr;
+
+	if (cmd != SIOCDELRT) {
+		cfg->fc_nlflags = NLM_F_CREATE;
+		cfg->fc_protocol = RTPROT_BOOT;
+	}
+
+	if (rt->rt_metric)
+		cfg->fc_priority = rt->rt_metric - 1;
+
+	if (rt->rt_flags & RTF_REJECT) {
+		cfg->fc_scope = RT_SCOPE_HOST;
+		cfg->fc_type = RTN_UNREACHABLE;
+		return 0;
+	}
+
+	cfg->fc_scope = RT_SCOPE_NOWHERE;
+	cfg->fc_type = RTN_UNICAST;
+
+	if (rt->rt_dev) {
+		char *colon;
+		struct net_device *dev;
+		char devname[IFNAMSIZ];
+
+		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
+			return -EFAULT;
+
+		devname[IFNAMSIZ-1] = 0;
+		colon = strchr(devname, ':');
+		if (colon)
+			*colon = 0;
+		dev = __dev_get_by_name(devname);
+		if (!dev)
+			return -ENODEV;
+		cfg->fc_oif = dev->ifindex;
+		if (colon) {
+			struct in_ifaddr *ifa;
+			struct in_device *in_dev = __in_dev_get_rtnl(dev);
+			if (!in_dev)
+				return -ENODEV;
+			*colon = ':';
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+				if (strcmp(ifa->ifa_label, devname) == 0)
+					break;
+			if (ifa == NULL)
+				return -ENODEV;
+			cfg->fc_prefsrc = ifa->ifa_local;
+		}
+	}
+
+	addr = sk_extract_addr(&rt->rt_gateway);
+	if (rt->rt_gateway.sa_family == AF_INET && addr) {
+		cfg->fc_gw = addr;
+		if (rt->rt_flags & RTF_GATEWAY &&
+		    inet_addr_type(addr) == RTN_UNICAST)
+			cfg->fc_scope = RT_SCOPE_UNIVERSE;
+	}
+
+	if (cmd == SIOCDELRT)
+		return 0;
+
+	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+		return -EINVAL;
+
+	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
+		cfg->fc_scope = RT_SCOPE_LINK;
+
+	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
+		struct nlattr *mx;
+		int len = 0;
+
+		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
+ 		if (mx == NULL)
+			return -ENOMEM;
+
+		if (rt->rt_flags & RTF_MTU)
+			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
+
+		if (rt->rt_flags & RTF_WINDOW)
+			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
+
+		if (rt->rt_flags & RTF_IRTT)
+			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
+
+		cfg->fc_mx = mx;
+		cfg->fc_mx_len = len;
+	}
+
+	return 0;
+}
+
 /*
  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
  */
  
 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 {
+	struct fib_config cfg;
+	struct rtentry rt;
 	int err;
-	struct kern_rta rta;
-	struct rtentry  r;
-	struct {
-		struct nlmsghdr nlh;
-		struct rtmsg	rtm;
-	} req;
 
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+
+		if (copy_from_user(&rt, arg, sizeof(rt)))
 			return -EFAULT;
+
 		rtnl_lock();
-		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+		err = rtentry_to_fib_config(cmd, &rt, &cfg);
 		if (err == 0) {
+			struct fib_table *tb;
+
 			if (cmd == SIOCDELRT) {
-				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
-				err = -ESRCH;
+				tb = fib_get_table(cfg.fc_table);
 				if (tb)
-					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+					err = tb->tb_delete(tb, &cfg);
+				else
+					err = -ESRCH;
 			} else {
-				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
-				err = -ENOBUFS;
+				tb = fib_new_table(cfg.fc_table);
 				if (tb)
-					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+					err = tb->tb_insert(tb, &cfg);
+				else
+					err = -ENOBUFS;
 			}
-			kfree(rta.rta_mx);
+
+			/* allocated by rtentry_to_fib_config() */
+			kfree(cfg.fc_mx);
 		}
 		rtnl_unlock();
 		return err;
@@ -284,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *
 
 #endif
 
-static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
-{
-	int i;
+struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+	[RTA_DST]		= { .type = NLA_U32 },
+	[RTA_SRC]		= { .type = NLA_U32 },
+	[RTA_IIF]		= { .type = NLA_U32 },
+	[RTA_OIF]		= { .type = NLA_U32 },
+	[RTA_GATEWAY]		= { .type = NLA_U32 },
+	[RTA_PRIORITY]		= { .type = NLA_U32 },
+	[RTA_PREFSRC]		= { .type = NLA_U32 },
+	[RTA_METRICS]		= { .type = NLA_NESTED },
+	[RTA_MULTIPATH]		= { .minlen = sizeof(struct rtnexthop) },
+	[RTA_PROTOINFO]		= { .type = NLA_U32 },
+	[RTA_FLOW]		= { .type = NLA_U32 },
+	[RTA_MP_ALGO]		= { .type = NLA_U32 },
+};
 
-	for (i=1; i<=RTA_MAX; i++, rta++) {
-		struct rtattr *attr = *rta;
-		if (attr) {
-			if (RTA_PAYLOAD(attr) < 4)
-				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS)
-				*rta = (struct rtattr*)RTA_DATA(attr);
+static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct fib_config *cfg)
+{
+	struct nlattr *attr;
+	int err, remaining;
+	struct rtmsg *rtm;
+
+	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	rtm = nlmsg_data(nlh);
+	cfg->fc_family = rtm->rtm_family;
+	cfg->fc_dst_len = rtm->rtm_dst_len;
+	cfg->fc_src_len = rtm->rtm_src_len;
+	cfg->fc_tos = rtm->rtm_tos;
+	cfg->fc_table = rtm->rtm_table;
+	cfg->fc_protocol = rtm->rtm_protocol;
+	cfg->fc_scope = rtm->rtm_scope;
+	cfg->fc_type = rtm->rtm_type;
+	cfg->fc_flags = rtm->rtm_flags;
+	cfg->fc_nlflags = nlh->nlmsg_flags;
+
+	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.nlh = nlh;
+
+	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
+		switch (attr->nla_type) {
+		case RTA_DST:
+			cfg->fc_dst = nla_get_u32(attr);
+			break;
+		case RTA_SRC:
+			cfg->fc_src = nla_get_u32(attr);
+			break;
+		case RTA_OIF:
+			cfg->fc_oif = nla_get_u32(attr);
+			break;
+		case RTA_GATEWAY:
+			cfg->fc_gw = nla_get_u32(attr);
+			break;
+		case RTA_PRIORITY:
+			cfg->fc_priority = nla_get_u32(attr);
+			break;
+		case RTA_PREFSRC:
+			cfg->fc_prefsrc = nla_get_u32(attr);
+			break;
+		case RTA_METRICS:
+			cfg->fc_mx = nla_data(attr);
+			cfg->fc_mx_len = nla_len(attr);
+			break;
+		case RTA_MULTIPATH:
+			cfg->fc_mp = nla_data(attr);
+			cfg->fc_mp_len = nla_len(attr);
+			break;
+		case RTA_FLOW:
+			cfg->fc_flow = nla_get_u32(attr);
+			break;
+		case RTA_MP_ALGO:
+			cfg->fc_mp_alg = nla_get_u32(attr);
+			break;
+		case RTA_TABLE:
+			cfg->fc_table = nla_get_u32(attr);
+			break;
 		}
 	}
+
 	return 0;
+errout:
+	return err;
 }
 
 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct fib_table * tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct fib_config cfg;
+	struct fib_table *tb;
+	int err;
 
-	if (inet_check_attr(r, rta))
-		return -EINVAL;
+	err = rtm_to_fib_config(skb, nlh, &cfg);
+	if (err < 0)
+		goto errout;
+
+	tb = fib_get_table(cfg.fc_table);
+	if (tb == NULL) {
+		err = -ESRCH;
+		goto errout;
+	}
 
-	tb = fib_get_table(r->rtm_table);
-	if (tb)
-		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
-	return -ESRCH;
+	err = tb->tb_delete(tb, &cfg);
+errout:
+	return err;
 }
 
 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct fib_table * tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct fib_config cfg;
+	struct fib_table *tb;
+	int err;
 
-	if (inet_check_attr(r, rta))
-		return -EINVAL;
+	err = rtm_to_fib_config(skb, nlh, &cfg);
+	if (err < 0)
+		goto errout;
+
+	tb = fib_new_table(cfg.fc_table);
+	if (tb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
-	tb = fib_new_table(r->rtm_table);
-	if (tb)
-		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
-	return -ENOBUFS;
+	err = tb->tb_insert(tb, &cfg);
+errout:
+	return err;
 }
 
 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int t;
-	int s_t;
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
 	struct fib_table *tb;
+	struct hlist_node *node;
+	int dumped = 0;
 
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
-	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 		return ip_rt_dump(skb, cb);
 
-	s_t = cb->args[0];
-	if (s_t == 0)
-		s_t = cb->args[0] = RT_TABLE_MIN;
+	s_h = cb->args[0];
+	s_e = cb->args[1];
 
-	for (t=s_t; t<=RT_TABLE_MAX; t++) {
-		if (t < s_t) continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
-		if ((tb = fib_get_table(t))==NULL)
-			continue;
-		if (tb->tb_dump(tb, skb, cb) < 0) 
-			break;
+	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
+			if (e < s_e)
+				goto next;
+			if (dumped)
+				memset(&cb->args[2], 0, sizeof(cb->args) -
+				                 2 * sizeof(cb->args[0]));
+			if (tb->tb_dump(tb, skb, cb) < 0)
+				goto out;
+			dumped = 1;
+next:
+			e++;
+		}
 	}
-
-	cb->args[0] = t;
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
 
 	return skb->len;
 }
@@ -366,17 +627,19 @@ int inet_dump_fib(struct sk_buff *skb, s
    only when netlink is already locked.
  */
 
-static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, u32 dst, int dst_len,
+		      struct in_ifaddr *ifa)
 {
-	struct fib_table * tb;
-	struct {
-		struct nlmsghdr	nlh;
-		struct rtmsg	rtm;
-	} req;
-	struct kern_rta rta;
-
-	memset(&req.rtm, 0, sizeof(req.rtm));
-	memset(&rta, 0, sizeof(rta));
+	struct fib_table *tb;
+	struct fib_config cfg = {
+		.fc_protocol = RTPROT_KERNEL,
+		.fc_type = type,
+		.fc_dst = dst,
+		.fc_dst_len = dst_len,
+		.fc_prefsrc = ifa->ifa_local,
+		.fc_oif = ifa->ifa_dev->dev->ifindex,
+		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
+	};
 
 	if (type == RTN_UNICAST)
 		tb = fib_new_table(RT_TABLE_MAIN);
@@ -386,26 +649,17 @@ static void fib_magic(int cmd, int type,
 	if (tb == NULL)
 		return;
 
-	req.nlh.nlmsg_len = sizeof(req);
-	req.nlh.nlmsg_type = cmd;
-	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
-	req.nlh.nlmsg_pid = 0;
-	req.nlh.nlmsg_seq = 0;
-
-	req.rtm.rtm_dst_len = dst_len;
-	req.rtm.rtm_table = tb->tb_id;
-	req.rtm.rtm_protocol = RTPROT_KERNEL;
-	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
-	req.rtm.rtm_type = type;
-
-	rta.rta_dst = &dst;
-	rta.rta_prefsrc = &ifa->ifa_local;
-	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+	cfg.fc_table = tb->tb_id;
+
+	if (type != RTN_LOCAL)
+		cfg.fc_scope = RT_SCOPE_LINK;
+	else
+		cfg.fc_scope = RT_SCOPE_HOST;
 
 	if (cmd == RTM_NEWROUTE)
-		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->tb_insert(tb, &cfg);
 	else
-		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->tb_delete(tb, &cfg);
 }
 
 void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -652,11 +906,17 @@ static struct notifier_block fib_netdev_
 
 void __init ip_fib_init(void)
 {
+	unsigned int i;
+
+	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+		INIT_HLIST_HEAD(&fib_table_hash[i]);
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
 #else
-	fib_rules_init();
+	fib4_rules_init();
 #endif
 
 	register_netdevice_notifier(&fib_netdev_notifier);
diff -puN net/ipv4/fib_hash.c~git-net net/ipv4/fib_hash.c
--- a/net/ipv4/fib_hash.c~git-net
+++ a/net/ipv4/fib_hash.c
@@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(st
 	return NULL;
 }
 
-static int
-fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fib_node *new_f, *f;
 	struct fib_alias *fa, *new_fa;
 	struct fn_zone *fz;
 	struct fib_info *fi;
-	int z = r->rtm_dst_len;
-	int type = r->rtm_type;
-	u8 tos = r->rtm_tos;
+	u8 tos = cfg->fc_tos;
 	u32 key;
 	int err;
 
-	if (z > 32)
+	if (cfg->fc_dst_len > 32)
 		return -EINVAL;
-	fz = table->fn_zones[z];
-	if (!fz && !(fz = fn_new_zone(table, z)))
+
+	fz = table->fn_zones[cfg->fc_dst_len];
+	if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
 		return -ENOBUFS;
 
 	key = 0;
-	if (rta->rta_dst) {
-		u32 dst;
-		memcpy(&dst, rta->rta_dst, 4);
-		if (dst & ~FZ_MASK(fz))
+	if (cfg->fc_dst) {
+		if (cfg->fc_dst & ~FZ_MASK(fz))
 			return -EINVAL;
-		key = fz_key(dst, fz);
+		key = fz_key(cfg->fc_dst, fz);
 	}
 
-	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
-		return err;
+	fi = fib_create_info(cfg);
+	if (IS_ERR(fi))
+		return PTR_ERR(fi);
 
 	if (fz->fz_nent > (fz->fz_divisor<<1) &&
 	    fz->fz_divisor < FZ_MAX_DIVISOR &&
-	    (z==32 || (1<<z) > fz->fz_divisor))
+	    (cfg->fc_dst_len == 32 ||
+	     (1 << cfg->fc_dst_len) > fz->fz_divisor))
 		fn_rehash_zone(fz);
 
 	f = fib_find_node(fz, key);
@@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, str
 		struct fib_alias *fa_orig;
 
 		err = -EEXIST;
-		if (n->nlmsg_flags & NLM_F_EXCL)
+		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
-		if (n->nlmsg_flags & NLM_F_REPLACE) {
+		if (cfg->fc_nlflags & NLM_F_REPLACE) {
 			struct fib_info *fi_drop;
 			u8 state;
 
 			write_lock_bh(&fib_hash_lock);
 			fi_drop = fa->fa_info;
 			fa->fa_info = fi;
-			fa->fa_type = type;
-			fa->fa_scope = r->rtm_scope;
+			fa->fa_type = cfg->fc_type;
+			fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			fa->fa_state &= ~FA_S_ACCESSED;
 			fib_hash_genid++;
@@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, str
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
-			if (fa->fa_type == type &&
-			    fa->fa_scope == r->rtm_scope &&
+			if (fa->fa_type == cfg->fc_type &&
+			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi)
 				goto out;
 		}
-		if (!(n->nlmsg_flags & NLM_F_APPEND))
+		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
 
 	err = -ENOENT;
-	if (!(n->nlmsg_flags&NLM_F_CREATE))
+	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
 	err = -ENOBUFS;
@@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, str
 
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
-	new_fa->fa_type = type;
-	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_type = cfg->fc_type;
+	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 
 	/*
@@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, str
 		fz->fz_nent++;
 	rt_cache_flush(-1);
 
-	rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
+	rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
+		  &cfg->fc_nlinfo);
 	return 0;
 
 out_free_new_fa:
@@ -537,30 +535,25 @@ out:
 }
 
 
-static int
-fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 	struct fib_node *f;
 	struct fib_alias *fa, *fa_to_delete;
-	int z = r->rtm_dst_len;
 	struct fn_zone *fz;
 	u32 key;
-	u8 tos = r->rtm_tos;
 
-	if (z > 32)
+	if (cfg->fc_dst_len > 32)
 		return -EINVAL;
-	if ((fz  = table->fn_zones[z]) == NULL)
+
+	if ((fz  = table->fn_zones[cfg->fc_dst_len]) == NULL)
 		return -ESRCH;
 
 	key = 0;
-	if (rta->rta_dst) {
-		u32 dst;
-		memcpy(&dst, rta->rta_dst, 4);
-		if (dst & ~FZ_MASK(fz))
+	if (cfg->fc_dst) {
+		if (cfg->fc_dst & ~FZ_MASK(fz))
 			return -EINVAL;
-		key = fz_key(dst, fz);
+		key = fz_key(cfg->fc_dst, fz);
 	}
 
 	f = fib_find_node(fz, key);
@@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, str
 	if (!f)
 		fa = NULL;
 	else
-		fa = fib_find_alias(&f->fn_alias, tos, 0);
+		fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
 	if (!fa)
 		return -ESRCH;
 
@@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, str
 	list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
-		if (fa->fa_tos != tos)
+		if (fa->fa_tos != cfg->fc_tos)
 			break;
 
-		if ((!r->rtm_type ||
-		     fa->fa_type == r->rtm_type) &&
-		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == r->rtm_scope) &&
-		    (!r->rtm_protocol ||
-		     fi->fib_protocol == r->rtm_protocol) &&
-		    fib_nh_match(r, n, rta, fi) == 0) {
+		if ((!cfg->fc_type ||
+		     fa->fa_type == cfg->fc_type) &&
+		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_protocol ||
+		     fi->fib_protocol == cfg->fc_protocol) &&
+		    fib_nh_match(cfg, fi) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, str
 		int kill_fn;
 
 		fa = fa_to_delete;
-		rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
+		rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
+			  tb->tb_id, &cfg->fc_nlinfo);
 
 		kill_fn = 0;
 		write_lock_bh(&fib_hash_lock);
@@ -684,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb,
 	struct fib_node *f;
 	int i, s_i;
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	i = 0;
 	hlist_for_each_entry(f, node, head, fn_hash) {
 		struct fib_alias *fa;
@@ -699,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb,
 					  tb->tb_id,
 					  fa->fa_type,
 					  fa->fa_scope,
-					  &f->fn_key,
+					  f->fn_key,
 					  fz->fz_order,
 					  fa->fa_tos,
 					  fa->fa_info,
 					  NLM_F_MULTI) < 0) {
-				cb->args[3] = i;
+				cb->args[4] = i;
 				return -1;
 			}
 		next:
 			i++;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -722,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, s
 {
 	int h, s_h;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 	for (h=0; h < fz->fz_divisor; h++) {
 		if (h < s_h) continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0,
+			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 		if (fz->fz_hash == NULL ||
 		    hlist_empty(&fz->fz_hash[h]))
 			continue;
 		if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -746,28 +740,28 @@ static int fn_hash_dump(struct fib_table
 	struct fn_zone *fz;
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 	read_lock(&fib_hash_lock);
 	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
 		if (m < s_m) continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0,
-			       sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0,
+			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
 		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			read_unlock(&fib_hash_lock);
 			return -1;
 		}
 	}
 	read_unlock(&fib_hash_lock);
-	cb->args[1] = m;
+	cb->args[2] = m;
 	return skb->len;
 }
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
 #else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
 #endif
 {
 	struct fib_table *tb;
diff -puN net/ipv4/fib_lookup.h~git-net net/ipv4/fib_lookup.h
--- a/net/ipv4/fib_lookup.h~git-net
+++ a/net/ipv4/fib_lookup.h
@@ -23,19 +23,14 @@ extern int fib_semantic_match(struct lis
 			      struct fib_result *res, __u32 zone, __u32 mask,
 				int prefixlen);
 extern void fib_release_info(struct fib_info *);
-extern struct fib_info *fib_create_info(const struct rtmsg *r,
-					struct kern_rta *rta,
-					const struct nlmsghdr *,
-					int *err);
-extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
-			struct kern_rta *rta, struct fib_info *fi);
+extern struct fib_info *fib_create_info(struct fib_config *cfg);
+extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			 u8 tb_id, u8 type, u8 scope, void *dst,
+			 u32 tb_id, u8 type, u8 scope, u32 dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-		      int z, int tb_id,
-		      struct nlmsghdr *n, struct netlink_skb_parms *req);
+		      int dst_len, u32 tb_id, struct nl_info *info);
 extern struct fib_alias *fib_find_alias(struct list_head *fah,
 					u8 tos, u32 prio);
 extern int fib_detect_death(struct fib_info *fi, int order,
diff -puN net/ipv4/fib_rules.c~git-net net/ipv4/fib_rules.c
--- a/net/ipv4/fib_rules.c~git-net
+++ a/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
  *
  *		IPv4 Forwarding Information Base: policy rules.
  *
- * Version:	$Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * 		Thomas Graf <tgraf@suug.ch>
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -19,463 +18,333 @@
  *		Marc Boucher	:	routing by fwmark
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/inetdevice.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
-
 #include <net/ip.h>
-#include <net/protocol.h>
 #include <net/route.h>
 #include <net/tcp.h>
-#include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_rules.h>
 
-#define FRprintk(a...)
+static struct fib_rules_ops fib4_rules_ops;
 
-struct fib_rule
+struct fib4_rule
 {
-	struct hlist_node hlist;
-	atomic_t	r_clntref;
-	u32		r_preference;
-	unsigned char	r_table;
-	unsigned char	r_action;
-	unsigned char	r_dst_len;
-	unsigned char	r_src_len;
-	u32		r_src;
-	u32		r_srcmask;
-	u32		r_dst;
-	u32		r_dstmask;
-	u32		r_srcmap;
-	u8		r_flags;
-	u8		r_tos;
+	struct fib_rule		common;
+	u8			dst_len;
+	u8			src_len;
+	u8			tos;
+	u32			src;
+	u32			srcmask;
+	u32			dst;
+	u32			dstmask;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	u32		r_fwmark;
+	u32			fwmark;
 #endif
-	int		r_ifindex;
 #ifdef CONFIG_NET_CLS_ROUTE
-	__u32		r_tclassid;
+	u32			tclassid;
 #endif
-	char		r_ifname[IFNAMSIZ];
-	int		r_dead;
-	struct		rcu_head rcu;
 };
 
-static struct fib_rule default_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_preference =	0x7FFF,
-	.r_table =	RT_TABLE_DEFAULT,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule default_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFF,
+		.table =	RT_TABLE_DEFAULT,
+		.action =	FR_ACT_TO_TBL,
+	},
 };
 
-static struct fib_rule main_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_preference =	0x7FFE,
-	.r_table =	RT_TABLE_MAIN,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule main_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFE,
+		.table =	RT_TABLE_MAIN,
+		.action =	FR_ACT_TO_TBL,
+	},
 };
 
-static struct fib_rule local_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_table =	RT_TABLE_LOCAL,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule local_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.table =	RT_TABLE_LOCAL,
+		.action =	FR_ACT_TO_TBL,
+		.flags =	FIB_RULE_PERMANENT,
+	},
 };
 
-static struct hlist_head fib_rules;
+static LIST_HEAD(fib4_rules);
 
-/* writer func called from netlink -- rtnl_sem hold*/
-
-static void rtmsg_rule(int, struct fib_rule *);
-
-int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+#ifdef CONFIG_NET_CLS_ROUTE
+u32 fib_rules_tclass(struct fib_result *res)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct fib_rule *r;
-	struct hlist_node *node;
-	int err = -ESRCH;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
-		    rtm->rtm_src_len == r->r_src_len &&
-		    rtm->rtm_dst_len == r->r_dst_len &&
-		    (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
-		    rtm->rtm_tos == r->r_tos &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
-		    (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
-		    (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
-		    (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
-		    (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
-		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-			err = -EPERM;
-			if (r == &local_rule)
-				break;
-
-			hlist_del_rcu(&r->hlist);
-			r->r_dead = 1;
-			rtmsg_rule(RTM_DELRULE, r);
-			fib_rule_put(r);
-			err = 0;
-			break;
-		}
-	}
-	return err;
+	return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
 }
+#endif
 
-/* Allocate new unique table id */
-
-static struct fib_table *fib_empty_table(void)
+int fib_lookup(struct flowi *flp, struct fib_result *res)
 {
-	int id;
+	struct fib_lookup_arg arg = {
+		.result = res,
+	};
+	int err;
 
-	for (id = 1; id <= RT_TABLE_MAX; id++)
-		if (fib_tables[id] == NULL)
-			return __fib_new_table(id);
-	return NULL;
-}
+	err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
+	res->r = arg.rule;
 
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
-	struct fib_rule *r = container_of(head, struct fib_rule, rcu);
-	kfree(r);
+	return err;
 }
 
-void fib_rule_put(struct fib_rule *r)
+static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
 {
-	if (atomic_dec_and_test(&r->r_clntref)) {
-		if (r->r_dead)
-			call_rcu(&r->rcu, fib_rule_put_rcu);
-		else
-			printk("Freeing alive rule %p\n", r);
-	}
-}
+	int err = -EAGAIN;
+	struct fib_table *tbl;
 
-/* writer func called from netlink -- rtnl_sem hold*/
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
 
-int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
-{
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct fib_rule *r, *new_r, *last = NULL;
-	struct hlist_node *node = NULL;
-	unsigned char table_id;
-
-	if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
-	    (rtm->rtm_tos & ~IPTOS_TOS_MASK))
-		return -EINVAL;
-
-	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
-		return -EINVAL;
-
-	table_id = rtm->rtm_table;
-	if (table_id == RT_TABLE_UNSPEC) {
-		struct fib_table *table;
-		if (rtm->rtm_type == RTN_UNICAST) {
-			if ((table = fib_empty_table()) == NULL)
-				return -ENOBUFS;
-			table_id = table->tb_id;
-		}
-	}
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		goto errout;
 
-	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
-	if (!new_r)
-		return -ENOMEM;
-
-	if (rta[RTA_SRC-1])
-		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
-	if (rta[RTA_DST-1])
-		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
-	if (rta[RTA_GATEWAY-1])
-		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
-	new_r->r_src_len = rtm->rtm_src_len;
-	new_r->r_dst_len = rtm->rtm_dst_len;
-	new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
-	new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
-	new_r->r_tos = rtm->rtm_tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-	if (rta[RTA_PROTOINFO-1])
-		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
-#endif
-	new_r->r_action = rtm->rtm_type;
-	new_r->r_flags = rtm->rtm_flags;
-	if (rta[RTA_PRIORITY-1])
-		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	new_r->r_table = table_id;
-	if (rta[RTA_IIF-1]) {
-		struct net_device *dev;
-		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
-		new_r->r_ifindex = -1;
-		dev = __dev_get_by_name(new_r->r_ifname);
-		if (dev)
-			new_r->r_ifindex = dev->ifindex;
-	}
-#ifdef CONFIG_NET_CLS_ROUTE
-	if (rta[RTA_FLOW-1])
-		memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
-#endif
-	r = container_of(fib_rules.first, struct fib_rule, hlist);
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		goto errout;
 
-	if (!new_r->r_preference) {
-		if (r && r->hlist.next != NULL) {
-			r = container_of(r->hlist.next, struct fib_rule, hlist);
-			if (r->r_preference)
-				new_r->r_preference = r->r_preference - 1;
-		}
+	case FR_ACT_BLACKHOLE:
+	default:
+		err = -EINVAL;
+		goto errout;
 	}
 
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_preference > new_r->r_preference)
-			break;
-		last = r;
-	}
-	atomic_inc(&new_r->r_clntref);
+	if ((tbl = fib_get_table(rule->table)) == NULL)
+		goto errout;
 
-	if (last)
-		hlist_add_after_rcu(&last->hlist, &new_r->hlist);
-	else
-		hlist_add_before_rcu(&new_r->hlist, &r->hlist);
-
-	rtmsg_rule(RTM_NEWRULE, new_r);
-	return 0;
+	err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+	if (err > 0)
+		err = -EAGAIN;
+errout:
+	return err;
 }
 
-#ifdef CONFIG_NET_CLS_ROUTE
-u32 fib_rules_tclass(struct fib_result *res)
+
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
 {
-	if (res->r)
-		return res->r->r_tclassid;
-	return 0;
+	if (res->r && res->r->action == FR_ACT_TO_TBL &&
+	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+		struct fib_table *tb;
+		if ((tb = fib_get_table(res->r->table)) != NULL)
+			tb->tb_select_default(tb, flp, res);
+	}
 }
+
+static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct fib4_rule *r = (struct fib4_rule *) rule;
+	u32 daddr = fl->fl4_dst;
+	u32 saddr = fl->fl4_src;
+
+	if (((saddr ^ r->src) & r->srcmask) ||
+	    ((daddr ^ r->dst) & r->dstmask))
+		return 0;
+
+	if (r->tos && (r->tos != fl->fl4_tos))
+		return 0;
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (r->fwmark && (r->fwmark != fl->fl4_fwmark))
+		return 0;
 #endif
 
-/* callers should hold rtnl semaphore */
+	return 1;
+}
 
-static void fib_rules_detach(struct net_device *dev)
+static struct fib_table *fib_empty_table(void)
 {
-	struct hlist_node *node;
-	struct fib_rule *r;
+	u32 id;
 
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_ifindex == dev->ifindex)
-			r->r_ifindex = -1;
-
-	}
+	for (id = 1; id <= RT_TABLE_MAX; id++)
+		if (fib_get_table(id) == NULL)
+			return fib_new_table(id);
+	return NULL;
 }
 
-/* callers should hold rtnl semaphore */
+static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .type = NLA_U32 },
+	[FRA_DST]	= { .type = NLA_U32 },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FLOW]	= { .type = NLA_U32 },
+	[FRA_TABLE]	= { .type = NLA_U32 },
+};
 
-static void fib_rules_attach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct fib_rule *r;
+static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+
+	if (frh->src_len > 32 || frh->dst_len > 32 ||
+	    (frh->tos & ~IPTOS_TOS_MASK))
+		goto errout;
+
+	if (rule->table == RT_TABLE_UNSPEC) {
+		if (rule->action == FR_ACT_TO_TBL) {
+			struct fib_table *table;
+
+			table = fib_empty_table();
+			if (table == NULL) {
+				err = -ENOBUFS;
+				goto errout;
+			}
 
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
-			r->r_ifindex = dev->ifindex;
+			rule->table = table->tb_id;
+		}
 	}
-}
 
-int fib_lookup(const struct flowi *flp, struct fib_result *res)
-{
-	int err;
-	struct fib_rule *r, *policy;
-	struct fib_table *tb;
-	struct hlist_node *node;
-
-	u32 daddr = flp->fl4_dst;
-	u32 saddr = flp->fl4_src;
-
-FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
-	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
-
-	rcu_read_lock();
-
-	hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
-		if (((saddr^r->r_src) & r->r_srcmask) ||
-		    ((daddr^r->r_dst) & r->r_dstmask) ||
-		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
+	if (tb[FRA_SRC])
+		rule4->src = nla_get_u32(tb[FRA_SRC]);
+
+	if (tb[FRA_DST])
+		rule4->dst = nla_get_u32(tb[FRA_DST]);
+
 #ifdef CONFIG_IP_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
+	if (tb[FRA_FWMARK])
+		rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
 #endif
-		    (r->r_ifindex && r->r_ifindex != flp->iif))
-			continue;
 
-FRprintk("tb %d r %d ", r->r_table, r->r_action);
-		switch (r->r_action) {
-		case RTN_UNICAST:
-			policy = r;
-			break;
-		case RTN_UNREACHABLE:
-			rcu_read_unlock();
-			return -ENETUNREACH;
-		default:
-		case RTN_BLACKHOLE:
-			rcu_read_unlock();
-			return -EINVAL;
-		case RTN_PROHIBIT:
-			rcu_read_unlock();
-			return -EACCES;
-		}
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (tb[FRA_FLOW])
+		rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
+#endif
 
-		if ((tb = fib_get_table(r->r_table)) == NULL)
-			continue;
-		err = tb->tb_lookup(tb, flp, res);
-		if (err == 0) {
-			res->r = policy;
-			if (policy)
-				atomic_inc(&policy->r_clntref);
-			rcu_read_unlock();
-			return 0;
-		}
-		if (err < 0 && err != -EAGAIN) {
-			rcu_read_unlock();
-			return err;
-		}
-	}
-FRprintk("FAILURE\n");
-	rcu_read_unlock();
-	return -ENETUNREACH;
-}
+	rule4->src_len = frh->src_len;
+	rule4->srcmask = inet_make_mask(rule4->src_len);
+	rule4->dst_len = frh->dst_len;
+	rule4->dstmask = inet_make_mask(rule4->dst_len);
+	rule4->tos = frh->tos;
 
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
-{
-	if (res->r && res->r->r_action == RTN_UNICAST &&
-	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
-		struct fib_table *tb;
-		if ((tb = fib_get_table(res->r->r_table)) != NULL)
-			tb->tb_select_default(tb, flp, res);
-	}
+	err = 0;
+errout:
+	return err;
 }
 
-static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
 {
-	struct net_device *dev = ptr;
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (event == NETDEV_UNREGISTER)
-		fib_rules_detach(dev);
-	else if (event == NETDEV_REGISTER)
-		fib_rules_attach(dev);
-	return NOTIFY_DONE;
-}
+	if (frh->src_len && (rule4->src_len != frh->src_len))
+		return 0;
 
+	if (frh->dst_len && (rule4->dst_len != frh->dst_len))
+		return 0;
 
-static struct notifier_block fib_rules_notifier = {
-	.notifier_call =fib_rules_event,
-};
+	if (frh->tos && (rule4->tos != frh->tos))
+		return 0;
 
-static __inline__ int inet_fill_rule(struct sk_buff *skb,
-				     struct fib_rule *r,
-				     u32 pid, u32 seq, int event,
-				     unsigned int flags)
-{
-	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
-
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
-	rtm->rtm_family = AF_INET;
-	rtm->rtm_dst_len = r->r_dst_len;
-	rtm->rtm_src_len = r->r_src_len;
-	rtm->rtm_tos = r->r_tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	if (r->r_fwmark)
-		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+	if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
 #endif
-	rtm->rtm_table = r->r_table;
-	rtm->rtm_protocol = 0;
-	rtm->rtm_scope = 0;
-	rtm->rtm_type = r->r_action;
-	rtm->rtm_flags = r->r_flags;
-
-	if (r->r_dst_len)
-		RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
-	if (r->r_src_len)
-		RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
-	if (r->r_ifname[0])
-		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
-	if (r->r_preference)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
-	if (r->r_srcmap)
-		RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (r->r_tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+	if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
+		return 0;
 #endif
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
+	if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC])))
+		return 0;
 
-/* callers should hold rtnl semaphore */
+	if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST])))
+		return 0;
 
-static void rtmsg_rule(int event, struct fib_rule *r)
+	return 1;
+}
+
+static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
 {
-	int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (!skb)
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
-	else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
-		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
-	} else {
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
-	}
+	frh->family = AF_INET;
+	frh->dst_len = rule4->dst_len;
+	frh->src_len = rule4->src_len;
+	frh->tos = rule4->tos;
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (rule4->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
+#endif
+
+	if (rule4->dst_len)
+		NLA_PUT_U32(skb, FRA_DST, rule4->dst);
+
+	if (rule4->src_len)
+		NLA_PUT_U32(skb, FRA_SRC, rule4->src);
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (rule4->tclassid)
+		NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
+#endif
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return fib_rules_dump(skb, cb, AF_INET);
 }
 
-int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static u32 fib4_rule_default_pref(void)
 {
-	int idx = 0;
-	int s_idx = cb->args[0];
-	struct fib_rule *r;
-	struct hlist_node *node;
-
-	rcu_read_lock();
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (idx < s_idx)
-			goto next;
-		if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
-				   cb->nlh->nlmsg_seq,
-				   RTM_NEWRULE, NLM_F_MULTI) < 0)
-			break;
-next:
-		idx++;
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&fib4_rules)) {
+		pos = fib4_rules.next;
+		if (pos->next != &fib4_rules) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
 	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
 
-	return skb->len;
+	return 0;
 }
 
-void __init fib_rules_init(void)
+static struct fib_rules_ops fib4_rules_ops = {
+	.family		= AF_INET,
+	.rule_size	= sizeof(struct fib4_rule),
+	.action		= fib4_rule_action,
+	.match		= fib4_rule_match,
+	.configure	= fib4_rule_configure,
+	.compare	= fib4_rule_compare,
+	.fill		= fib4_rule_fill,
+	.default_pref	= fib4_rule_default_pref,
+	.nlgroup	= RTNLGRP_IPV4_RULE,
+	.policy		= fib4_rule_policy,
+	.rules_list	= &fib4_rules,
+	.owner		= THIS_MODULE,
+};
+
+void __init fib4_rules_init(void)
 {
-	INIT_HLIST_HEAD(&fib_rules);
-	hlist_add_head(&local_rule.hlist, &fib_rules);
-	hlist_add_after(&local_rule.hlist, &main_rule.hlist);
-	hlist_add_after(&main_rule.hlist, &default_rule.hlist);
-	register_netdevice_notifier(&fib_rules_notifier);
+	list_add_tail(&local_rule.common.list, &fib4_rules);
+	list_add_tail(&main_rule.common.list, &fib4_rules);
+	list_add_tail(&default_rule.common.list, &fib4_rules);
+
+	fib_rules_register(&fib4_rules_ops);
 }
diff -puN net/ipv4/fib_semantics.c~git-net net/ipv4/fib_semantics.c
--- a/net/ipv4/fib_semantics.c~git-net
+++ a/net/ipv4/fib_semantics.c
@@ -33,7 +33,6 @@
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/init.h>
 
 #include <net/arp.h>
@@ -44,12 +43,14 @@
 #include <net/sock.h>
 #include <net/ip_fib.h>
 #include <net/ip_mp_alg.h>
+#include <net/netlink.h>
+#include <net/nexthop.h>
 
 #include "fib_lookup.h"
 
 #define FSprintk(a...)
 
-static DEFINE_RWLOCK(fib_info_lock);
+static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_hash_size;
@@ -159,7 +160,7 @@ void free_fib_info(struct fib_info *fi)
 
 void fib_release_info(struct fib_info *fi)
 {
-	write_lock(&fib_info_lock);
+	spin_lock(&fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
@@ -172,7 +173,7 @@ void fib_release_info(struct fib_info *f
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
-	write_unlock(&fib_info_lock);
+	spin_unlock(&fib_info_lock);
 }
 
 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -254,7 +255,7 @@ int ip_fib_check_default(u32 gw, struct 
 	struct fib_nh *nh;
 	unsigned int hash;
 
-	read_lock(&fib_info_lock);
+	spin_lock(&fib_info_lock);
 
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
@@ -262,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct 
 		if (nh->nh_dev == dev &&
 		    nh->nh_gw == gw &&
 		    !(nh->nh_flags&RTNH_F_DEAD)) {
-			read_unlock(&fib_info_lock);
+			spin_unlock(&fib_info_lock);
 			return 0;
 		}
 	}
 
-	read_unlock(&fib_info_lock);
+	spin_unlock(&fib_info_lock);
 
 	return -1;
 }
 
 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-	       int z, int tb_id,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+	       int dst_len, u32 tb_id, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	u32 pid = req ? req->pid : n->nlmsg_pid;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb)
-		return;
-
-	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
-			  fa->fa_type, fa->fa_scope, &key, z,
-			  fa->fa_tos,
-			  fa->fa_info, 0) < 0) {
+	int payload = sizeof(struct rtmsg) + 256;
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+			    fa->fa_type, fa->fa_scope, key, dst_len,
+			    fa->fa_tos, fa->fa_info, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
-	if (n->nlmsg_flags&NLM_F_ECHO)
-		atomic_inc(&skb->users);
-	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
-	if (n->nlmsg_flags&NLM_F_ECHO)
-		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
+			  info->nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
 }
 
 /* Return the first fib alias matching TOS with
@@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
-static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
-{
-	while (RTA_OK(attr,attrlen)) {
-		if (attr->rta_type == type)
-			return *(u32*)RTA_DATA(attr);
-		attr = RTA_NEXT(attr, attrlen);
-	}
-	return 0;
-}
-
-static int
-fib_count_nexthops(struct rtattr *rta)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
 {
 	int nhs = 0;
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
 
-	while (nhlen >= (int)sizeof(struct rtnexthop)) {
-		if ((nhlen -= nhp->rtnh_len) < 0)
-			return 0;
+	while (rtnh_ok(rtnh, remaining)) {
 		nhs++;
-		nhp = RTNH_NEXT(nhp);
-	};
-	return nhs;
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	/* leftover implies invalid nexthop configuration, discard it */
+	return remaining > 0 ? 0 : nhs;
 }
 
-static int
-fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+		       int remaining, struct fib_config *cfg)
 {
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
-
 	change_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		int attrlen;
+
+		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
-		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
-		nh->nh_oif = nhp->rtnh_ifindex;
-		nh->nh_weight = nhp->rtnh_hops + 1;
-		if (attrlen) {
-			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+
+		nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+		nh->nh_oif = rtnh->rtnh_ifindex;
+		nh->nh_weight = rtnh->rtnh_hops + 1;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			nh->nh_gw = nla ? nla_get_u32(nla) : 0;
 #ifdef CONFIG_NET_CLS_ROUTE
-			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
+
 	return 0;
 }
 
 #endif
 
-int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
-		 struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	struct rtnexthop *nhp;
-	int nhlen;
+	struct rtnexthop *rtnh;
+	int remaining;
 #endif
 
-	if (rta->rta_priority &&
-	    *rta->rta_priority != fi->fib_priority)
+	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
-	if (rta->rta_oif || rta->rta_gw) {
-		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
-		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+	if (cfg->fc_oif || cfg->fc_gw) {
+		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
+		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
 			return 0;
 		return 1;
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (rta->rta_mp == NULL)
+	if (cfg->fc_mp == NULL)
 		return 0;
-	nhp = RTA_DATA(rta->rta_mp);
-	nhlen = RTA_PAYLOAD(rta->rta_mp);
+
+	rtnh = cfg->fc_mp;
+	remaining = cfg->fc_mp_len;
 	
 	for_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		u32 gw;
+		int attrlen;
 
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
-		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+
+		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
 			return 1;
-		if (attrlen) {
-			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
-			if (gw && gw != nh->nh_gw)
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen < 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla && nla_get_u32(nla) != nh->nh_gw)
 				return 1;
 #ifdef CONFIG_NET_CLS_ROUTE
-			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
-			if (gw && gw != nh->nh_tclassid)
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 				return 1;
 #endif
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
 #endif
 	return 0;
@@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct
 						|-> {local prefix} (terminal node)
  */
 
-static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+			struct fib_nh *nh)
 {
 	int err;
 
@@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtm
 		if (nh->nh_flags&RTNH_F_ONLINK) {
 			struct net_device *dev;
 
-			if (r->rtm_scope >= RT_SCOPE_LINK)
+			if (cfg->fc_scope >= RT_SCOPE_LINK)
 				return -EINVAL;
 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
 				return -EINVAL;
@@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtm
 			return 0;
 		}
 		{
-			struct flowi fl = { .nl_u = { .ip4_u =
-						      { .daddr = nh->nh_gw,
-							.scope = r->rtm_scope + 1 } },
-					    .oif = nh->nh_oif };
+			struct flowi fl = {
+				.nl_u = {
+					.ip4_u = {
+						.daddr = nh->nh_gw,
+						.scope = cfg->fc_scope + 1,
+					},
+				},
+				.oif = nh->nh_oif,
+			};
 
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -598,7 +603,7 @@ static void fib_hash_move(struct hlist_h
 	unsigned int old_size = fib_hash_size;
 	unsigned int i, bytes;
 
-	write_lock(&fib_info_lock);
+	spin_lock(&fib_info_lock);
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
 	fib_hash_size = new_size;
@@ -639,46 +644,35 @@ static void fib_hash_move(struct hlist_h
 	}
 	fib_info_laddrhash = new_laddrhash;
 
-	write_unlock(&fib_info_lock);
+	spin_unlock(&fib_info_lock);
 
 	bytes = old_size * sizeof(struct hlist_head *);
 	fib_hash_free(old_info_hash, bytes);
 	fib_hash_free(old_laddrhash, bytes);
 }
 
-struct fib_info *
-fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
-		const struct nlmsghdr *nlh, int *errp)
+struct fib_info *fib_create_info(struct fib_config *cfg)
 {
 	int err;
 	struct fib_info *fi = NULL;
 	struct fib_info *ofi;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int nhs = 1;
-#else
-	const int nhs = 1;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	u32 mp_alg = IP_MP_ALG_NONE;
-#endif
 
 	/* Fast check to catch the most weird cases */
-	if (fib_props[r->rtm_type].scope > r->rtm_scope)
+	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 		goto err_inval;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (rta->rta_mp) {
-		nhs = fib_count_nexthops(rta->rta_mp);
+	if (cfg->fc_mp) {
+		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
 		if (nhs == 0)
 			goto err_inval;
 	}
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rta->rta_mp_alg) {
-		mp_alg = *rta->rta_mp_alg;
-
-		if (mp_alg < IP_MP_ALG_NONE ||
-		    mp_alg > IP_MP_ALG_MAX)
+	if (cfg->fc_mp_alg) {
+		if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+		    cfg->fc_mp_alg > IP_MP_ALG_MAX)
 			goto err_inval;
 	}
 #endif
@@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, s
 		goto failure;
 	fib_info_cnt++;
 
-	fi->fib_protocol = r->rtm_protocol;
+	fi->fib_protocol = cfg->fc_protocol;
+	fi->fib_flags = cfg->fc_flags;
+	fi->fib_priority = cfg->fc_priority;
+	fi->fib_prefsrc = cfg->fc_prefsrc;
 
 	fi->fib_nhs = nhs;
 	change_nexthops(fi) {
 		nh->nh_parent = fi;
 	} endfor_nexthops(fi)
 
-	fi->fib_flags = r->rtm_flags;
-	if (rta->rta_priority)
-		fi->fib_priority = *rta->rta_priority;
-	if (rta->rta_mx) {
-		int attrlen = RTA_PAYLOAD(rta->rta_mx);
-		struct rtattr *attr = RTA_DATA(rta->rta_mx);
-
-		while (RTA_OK(attr, attrlen)) {
-			unsigned flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > RTAX_MAX)
+	if (cfg->fc_mx) {
+		struct nlattr *nla;
+		int remaining;
+
+		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+			int type = nla->nla_type;
+
+			if (type) {
+				if (type > RTAX_MAX)
 					goto err_inval;
-				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+				fi->fib_metrics[type - 1] = nla_get_u32(nla);
 			}
-			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
-	if (rta->rta_prefsrc)
-		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
 
-	if (rta->rta_mp) {
+	if (cfg->fc_mp) {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+		if (err != 0)
 			goto failure;
-		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
 			goto err_inval;
-		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 			goto err_inval;
 #ifdef CONFIG_NET_CLS_ROUTE
-		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 			goto err_inval;
 #endif
 #else
@@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, s
 #endif
 	} else {
 		struct fib_nh *nh = fi->fib_nh;
-		if (rta->rta_oif)
-			nh->nh_oif = *rta->rta_oif;
-		if (rta->rta_gw)
-			memcpy(&nh->nh_gw, rta->rta_gw, 4);
+
+		nh->nh_oif = cfg->fc_oif;
+		nh->nh_gw = cfg->fc_gw;
+		nh->nh_flags = cfg->fc_flags;
 #ifdef CONFIG_NET_CLS_ROUTE
-		if (rta->rta_flow)
-			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+		nh->nh_tclassid = cfg->fc_flow;
 #endif
-		nh->nh_flags = r->rtm_flags;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		nh->nh_weight = 1;
 #endif
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	fi->fib_mp_alg = mp_alg;
+	fi->fib_mp_alg = cfg->fc_mp_alg;
 #endif
 
-	if (fib_props[r->rtm_type].error) {
-		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+	if (fib_props[cfg->fc_type].error) {
+		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
 			goto err_inval;
 		goto link_it;
 	}
 
-	if (r->rtm_scope > RT_SCOPE_HOST)
+	if (cfg->fc_scope > RT_SCOPE_HOST)
 		goto err_inval;
 
-	if (r->rtm_scope == RT_SCOPE_HOST) {
+	if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
@@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, s
 			goto failure;
 	} else {
 		change_nexthops(fi) {
-			if ((err = fib_check_nh(r, fi, nh)) != 0)
+			if ((err = fib_check_nh(cfg, fi, nh)) != 0)
 				goto failure;
 		} endfor_nexthops(fi)
 	}
 
 	if (fi->fib_prefsrc) {
-		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
-		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+		    fi->fib_prefsrc != cfg->fc_dst)
 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
 				goto err_inval;
 	}
@@ -820,7 +811,7 @@ link_it:
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock(&fib_info_lock);
+	spin_lock(&fib_info_lock);
 	hlist_add_head(&fi->fib_hash,
 		       &fib_info_hash[fib_info_hashfn(fi)]);
 	if (fi->fib_prefsrc) {
@@ -839,19 +830,19 @@ link_it:
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
-	write_unlock(&fib_info_lock);
+	spin_unlock(&fib_info_lock);
 	return fi;
 
 err_inval:
 	err = -EINVAL;
 
 failure:
-        *errp = err;
         if (fi) {
 		fi->fib_dead = 1;
 		free_fib_info(fi);
 	}
-	return NULL;
+
+	return ERR_PTR(err);
 }
 
 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
@@ -937,224 +928,89 @@ u32 __fib_res_prefsrc(struct fib_result 
 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
 }
 
-int
-fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
-	      struct fib_info *fi, unsigned int flags)
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+		  u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
+		  struct fib_info *fi, unsigned int flags)
 {
+	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET;
 	rtm->rtm_dst_len = dst_len;
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = tos;
 	rtm->rtm_table = tb_id;
+	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
 	rtm->rtm_scope = scope;
-	if (rtm->rtm_dst_len)
-		RTA_PUT(skb, RTA_DST, 4, dst);
 	rtm->rtm_protocol = fi->fib_protocol;
+
+	if (rtm->rtm_dst_len)
+		NLA_PUT_U32(skb, RTA_DST, dst);
+
 	if (fi->fib_priority)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+		NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
+
 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	if (fi->fib_prefsrc)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+		NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
+
 	if (fi->fib_nhs == 1) {
 		if (fi->fib_nh->nh_gw)
-			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+			NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
+
 		if (fi->fib_nh->nh_oif)
-			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
 #ifdef CONFIG_NET_CLS_ROUTE
 		if (fi->fib_nh[0].nh_tclassid)
-			RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
 	}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (fi->fib_nhs > 1) {
-		struct rtnexthop *nhp;
-		struct rtattr *mp_head;
-		if (skb_tailroom(skb) <= RTA_SPACE(0))
-			goto rtattr_failure;
-		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+		struct rtnexthop *rtnh;
+		struct nlattr *mp;
+
+		mp = nla_nest_start(skb, RTA_MULTIPATH);
+		if (mp == NULL)
+			goto nla_put_failure;
 
 		for_nexthops(fi) {
-			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
-				goto rtattr_failure;
-			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
-			nhp->rtnh_flags = nh->nh_flags & 0xFF;
-			nhp->rtnh_hops = nh->nh_weight-1;
-			nhp->rtnh_ifindex = nh->nh_oif;
+			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+			if (rtnh == NULL)
+				goto nla_put_failure;
+
+			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+			rtnh->rtnh_hops = nh->nh_weight - 1;
+			rtnh->rtnh_ifindex = nh->nh_oif;
+
 			if (nh->nh_gw)
-				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+				NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
 #ifdef CONFIG_NET_CLS_ROUTE
 			if (nh->nh_tclassid)
-				RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
+				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
-			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+			/* length of rtnetlink header + attributes */
+			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
 		} endfor_nexthops(fi);
-		mp_head->rta_type = RTA_MULTIPATH;
-		mp_head->rta_len = skb->tail - (u8*)mp_head;
-	}
-#endif
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#ifndef CONFIG_IP_NOSIOCRT
-
-int
-fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
-		    struct kern_rta *rta, struct rtentry *r)
-{
-	int    plen;
-	u32    *ptr;
-
-	memset(rtm, 0, sizeof(*rtm));
-	memset(rta, 0, sizeof(*rta));
-
-	if (r->rt_dst.sa_family != AF_INET)
-		return -EAFNOSUPPORT;
-
-	/* Check mask for validity:
-	   a) it must be contiguous.
-	   b) destination must have all host bits clear.
-	   c) if application forgot to set correct family (AF_INET),
-	      reject request unless it is absolutely clear i.e.
-	      both family and mask are zero.
-	 */
-	plen = 32;
-	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
-	if (!(r->rt_flags&RTF_HOST)) {
-		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
-		if (r->rt_genmask.sa_family != AF_INET) {
-			if (mask || r->rt_genmask.sa_family)
-				return -EAFNOSUPPORT;
-		}
-		if (bad_mask(mask, *ptr))
-			return -EINVAL;
-		plen = inet_mask_len(mask);
-	}
-
-	nl->nlmsg_flags = NLM_F_REQUEST;
-	nl->nlmsg_pid = 0;
-	nl->nlmsg_seq = 0;
-	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
-	if (cmd == SIOCDELRT) {
-		nl->nlmsg_type = RTM_DELROUTE;
-		nl->nlmsg_flags = 0;
-	} else {
-		nl->nlmsg_type = RTM_NEWROUTE;
-		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
-		rtm->rtm_protocol = RTPROT_BOOT;
-	}
-
-	rtm->rtm_dst_len = plen;
-	rta->rta_dst = ptr;
-
-	if (r->rt_metric) {
-		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
-		rta->rta_priority = (u32*)&r->rt_pad3;
-	}
-	if (r->rt_flags&RTF_REJECT) {
-		rtm->rtm_scope = RT_SCOPE_HOST;
-		rtm->rtm_type = RTN_UNREACHABLE;
-		return 0;
-	}
-	rtm->rtm_scope = RT_SCOPE_NOWHERE;
-	rtm->rtm_type = RTN_UNICAST;
 
-	if (r->rt_dev) {
-		char *colon;
-		struct net_device *dev;
-		char   devname[IFNAMSIZ];
-
-		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
-			return -EFAULT;
-		devname[IFNAMSIZ-1] = 0;
-		colon = strchr(devname, ':');
-		if (colon)
-			*colon = 0;
-		dev = __dev_get_by_name(devname);
-		if (!dev)
-			return -ENODEV;
-		rta->rta_oif = &dev->ifindex;
-		if (colon) {
-			struct in_ifaddr *ifa;
-			struct in_device *in_dev = __in_dev_get_rtnl(dev);
-			if (!in_dev)
-				return -ENODEV;
-			*colon = ':';
-			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
-				if (strcmp(ifa->ifa_label, devname) == 0)
-					break;
-			if (ifa == NULL)
-				return -ENODEV;
-			rta->rta_prefsrc = &ifa->ifa_local;
-		}
-	}
-
-	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
-	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
-		rta->rta_gw = ptr;
-		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
-			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+		nla_nest_end(skb, mp);
 	}
+#endif
+	return nlmsg_end(skb, nlh);
 
-	if (cmd == SIOCDELRT)
-		return 0;
-
-	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
-		return -EINVAL;
-
-	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
-		rtm->rtm_scope = RT_SCOPE_LINK;
-
-	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
-		struct rtattr *rec;
-		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
-		if (mx == NULL)
-			return -ENOMEM;
-		rta->rta_mx = mx;
-		mx->rta_type = RTA_METRICS;
-		mx->rta_len  = RTA_LENGTH(0);
-		if (r->rt_flags&RTF_MTU) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_ADVMSS;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
-		}
-		if (r->rt_flags&RTF_WINDOW) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_WINDOW;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_window;
-		}
-		if (r->rt_flags&RTF_IRTT) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_RTT;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
-		}
-	}
-	return 0;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
-#endif
-
 /*
    Update FIB if:
    - local address disappeared -> we must delete all the entries
diff -puN net/ipv4/fib_trie.c~git-net net/ipv4/fib_trie.c
--- a/net/ipv4/fib_trie.c~git-net
+++ a/net/ipv4/fib_trie.c
@@ -1124,17 +1124,14 @@ err:
 	return fa_head;
 }
 
-static int
-fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *new_fa;
 	struct list_head *fa_head = NULL;
 	struct fib_info *fi;
-	int plen = r->rtm_dst_len;
-	int type = r->rtm_type;
-	u8 tos = r->rtm_tos;
+	int plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
 	u32 key, mask;
 	int err;
 	struct leaf *l;
@@ -1142,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, str
 	if (plen > 32)
 		return -EINVAL;
 
-	key = 0;
-	if (rta->rta_dst)
-		memcpy(&key, rta->rta_dst, 4);
-
-	key = ntohl(key);
+	key = ntohl(cfg->fc_dst);
 
-	pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
+	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
 
 	mask = ntohl(inet_make_mask(plen));
 
@@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, str
 
 	key = key & mask;
 
-	fi = fib_create_info(r, rta, nlhdr, &err);
-
-	if (!fi)
+	fi = fib_create_info(cfg);
+	if (IS_ERR(fi)) {
+		err = PTR_ERR(fi);
 		goto err;
+	}
 
 	l = fib_find_node(t, key);
 	fa = NULL;
@@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, str
 		struct fib_alias *fa_orig;
 
 		err = -EEXIST;
-		if (nlhdr->nlmsg_flags & NLM_F_EXCL)
+		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
-		if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
+		if (cfg->fc_nlflags & NLM_F_REPLACE) {
 			struct fib_info *fi_drop;
 			u8 state;
 
@@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, str
 			fi_drop = fa->fa_info;
 			new_fa->fa_tos = fa->fa_tos;
 			new_fa->fa_info = fi;
-			new_fa->fa_type = type;
-			new_fa->fa_scope = r->rtm_scope;
+			new_fa->fa_type = cfg->fc_type;
+			new_fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			new_fa->fa_state &= ~FA_S_ACCESSED;
 
@@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, str
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
-			if (fa->fa_type == type &&
-			    fa->fa_scope == r->rtm_scope &&
+			if (fa->fa_type == cfg->fc_type &&
+			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi) {
 				goto out;
 			}
 		}
-		if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
+		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
 	err = -ENOENT;
-	if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
+	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
 	err = -ENOBUFS;
@@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, str
 
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
-	new_fa->fa_type = type;
-	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_type = cfg->fc_type;
+	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 	/*
 	 * Insert new entry to the list.
@@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, str
 			  (fa ? &fa->fa_list : fa_head));
 
 	rt_cache_flush(-1);
-	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
+	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+		  &cfg->fc_nlinfo);
 succeeded:
 	return 0;
 
@@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie 
 	return 1;
 }
 
-static int
-fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-		struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	u32 key, mask;
-	int plen = r->rtm_dst_len;
-	u8 tos = r->rtm_tos;
+	int plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
 	struct list_head *fa_head;
 	struct leaf *l;
 	struct leaf_info *li;
 
-
 	if (plen > 32)
 		return -EINVAL;
 
-	key = 0;
-	if (rta->rta_dst)
-		memcpy(&key, rta->rta_dst, 4);
-
-	key = ntohl(key);
+	key = ntohl(cfg->fc_dst);
 	mask = ntohl(inet_make_mask(plen));
 
 	if (key & ~mask)
@@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, str
 		if (fa->fa_tos != tos)
 			break;
 
-		if ((!r->rtm_type ||
-		     fa->fa_type == r->rtm_type) &&
-		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == r->rtm_scope) &&
-		    (!r->rtm_protocol ||
-		     fi->fib_protocol == r->rtm_protocol) &&
-		    fib_nh_match(r, nlhdr, rta, fi) == 0) {
+		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
+		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_protocol ||
+		     fi->fib_protocol == cfg->fc_protocol) &&
+		    fib_nh_match(cfg, fi) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, str
 		return -ESRCH;
 
 	fa = fa_to_delete;
-	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
+	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
+		  &cfg->fc_nlinfo);
 
 	l = fib_find_node(t, key);
 	li = find_leaf_info(l, plen);
@@ -1848,7 +1836,7 @@ static int fn_trie_dump_fa(t_key key, in
 
 	u32 xkey = htonl(key);
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
@@ -1866,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, in
 				  tb->tb_id,
 				  fa->fa_type,
 				  fa->fa_scope,
-				  &xkey,
+				  xkey,
 				  plen,
 				  fa->fa_tos,
 				  fa->fa_info, 0) < 0) {
-			cb->args[3] = i;
+			cb->args[4] = i;
 			return -1;
 		}
 		i++;
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -1886,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie
 	struct list_head *fa_head;
 	struct leaf *l = NULL;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 
 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0,
+			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 
 		fa_head = get_fa_head(l, plen);
 
@@ -1904,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie
 			continue;
 
 		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -1917,23 +1905,23 @@ static int fn_trie_dump(struct fib_table
 	int m, s_m;
 	struct trie *t = (struct trie *) tb->tb_data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 
 	rcu_read_lock();
 	for (m = 0; m <= 32; m++) {
 		if (m < s_m)
 			continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0,
-				sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0,
+				sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
 		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			goto out;
 		}
 	}
 	rcu_read_unlock();
-	cb->args[1] = m;
+	cb->args[2] = m;
 	return skb->len;
 out:
 	rcu_read_unlock();
@@ -1943,9 +1931,9 @@ out:
 /* Fix more generic FIB names for init later */
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
 #else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
 #endif
 {
 	struct fib_table *tb;
diff -puN net/ipv4/icmp.c~git-net net/ipv4/icmp.c
--- a/net/ipv4/icmp.c~git-net
+++ a/net/ipv4/icmp.c
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
 };
 
 /* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts = 1;
+int sysctl_icmp_echo_ignore_all __read_mostly;
+int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
 
 /* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses = 1;
+int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
 
 /*
  * 	Configurable global rate limit.
@@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_respo
  *	time exceeded (11), parameter problem (12)
  */
 
-int sysctl_icmp_ratelimit = 1 * HZ;
-int sysctl_icmp_ratemask = 0x1818;
-int sysctl_icmp_errors_use_inbound_ifaddr;
+int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
+int sysctl_icmp_ratemask __read_mostly = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
 
 /*
  *	ICMP control array. This specifies what to do with each ICMP.
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *
 						.saddr = rt->rt_spec_dst,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    .proto = IPPROTO_ICMP };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, i
 				}
 			}
 		};
+		security_skb_classify_flow(skb_in, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
@@ -928,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb)
 	ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!(u16)csum_fold(skb->csum))
 			break;
 		/* fall through */
diff -puN net/ipv4/igmp.c~git-net net/ipv4/igmp.c
--- a/net/ipv4/igmp.c~git-net
+++ a/net/ipv4/igmp.c
@@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb)
 		goto drop;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!(u16)csum_fold(skb->csum))
 			break;
 		/* fall through */
@@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev
 /*
  *	Join a socket to a group
  */
-int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
-int sysctl_igmp_max_msf = IP_MAX_MSF;
+int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
+int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
 
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
diff -puN net/ipv4/inet_connection_sock.c~git-net net/ipv4/inet_connection_sock.c
--- a/net/ipv4/inet_connection_sock.c~git-net
+++ a/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(str
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
 
+	security_req_classify_flow(req, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
@@ -509,6 +510,8 @@ struct sock *inet_csk_clone(struct sock 
 
 		/* Deinitialize accept_queue to trap illegal accesses. */
 		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+
+		security_inet_csk_clone(newsk, req);
 	}
 	return newsk;
 }
diff -puN net/ipv4/inet_hashtables.c~git-net net/ipv4/inet_hashtables.c
--- a/net/ipv4/inet_hashtables.c~git-net
+++ a/net/ipv4/inet_hashtables.c
@@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
-struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-				    const unsigned short hnum, const int dif)
+static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+					      const u32 daddr,
+					      const unsigned short hnum,
+					      const int dif)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -159,6 +161,33 @@ struct sock *__inet_lookup_listener(cons
 	return result;
 }
 
+/* Optimize the common listener case. */
+struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+				    const u32 daddr, const unsigned short hnum,
+				    const int dif)
+{
+	struct sock *sk = NULL;
+	const struct hlist_head *head;
+
+	read_lock(&hashinfo->lhash_lock);
+	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	if (!hlist_empty(head)) {
+		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+		if (inet->num == hnum && !sk->sk_node.next &&
+		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+		    !sk->sk_bound_dev_if)
+			goto sherry_cache;
+		sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
+	}
+	if (sk) {
+sherry_cache:
+		sock_hold(sk);
+	}
+	read_unlock(&hashinfo->lhash_lock);
+	return sk;
+}
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
 
 /* called with local bh disabled */
diff -puN net/ipv4/ip_fragment.c~git-net net/ipv4/ip_fragment.c
--- a/net/ipv4/ip_fragment.c~git-net
+++ a/net/ipv4/ip_fragment.c
@@ -54,15 +54,15 @@
  * even the most extreme cases without allowing an attacker to measurably
  * harm machine performance.
  */
-int sysctl_ipfrag_high_thresh = 256*1024;
-int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
 
-int sysctl_ipfrag_max_dist = 64;
+int sysctl_ipfrag_max_dist __read_mostly = 64;
 
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
-int sysctl_ipfrag_time = IP_FRAG_TIME;
+int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
 
 struct ipfrag_skb_cb
 {
@@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u3
 }
 
 static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ipfrag_secret_rebuild(unsigned long dummy)
 {
@@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(str
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &ip_frag_mem);
diff -puN net/ipv4/ip_gre.c~git-net net/ipv4/ip_gre.c
--- a/net/ipv4/ip_gre.c~git-net
+++ a/net/ipv4/ip_gre.c
@@ -576,7 +576,7 @@ static int ipgre_rcv(struct sk_buff *skb
 
 		if (flags&GRE_CSUM) {
 			switch (skb->ip_summed) {
-			case CHECKSUM_HW:
+			case CHECKSUM_COMPLETE:
 				csum = (u16)csum_fold(skb->csum);
 				if (!csum)
 					break;
@@ -584,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb
 			case CHECKSUM_NONE:
 				skb->csum = 0;
 				csum = __skb_checksum_complete(skb);
-				skb->ip_summed = CHECKSUM_HW;
+				skb->ip_summed = CHECKSUM_COMPLETE;
 			}
 			offset += 4;
 		}
diff -puN net/ipv4/ip_options.c~git-net net/ipv4/ip_options.c
--- a/net/ipv4/ip_options.c~git-net
+++ a/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/route.h>
+#include <net/cipso_ipv4.h>
 
 /* 
  * Write options to IP header, record destination address to
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * 
 			dopt->is_strictroute = sopt->is_strictroute;
 		}
 	}
+	if (sopt->cipso) {
+		optlen  = sptr[sopt->cipso+1];
+		dopt->cipso = dopt->optlen+sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->cipso, optlen);
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
 	while (dopt->optlen & 3) {
 		*dptr++ = IPOPT_END;
 		dopt->optlen++;
@@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options
 			if (optptr[2] == 0 && optptr[3] == 0)
 				opt->router_alert = optptr - iph;
 			break;
+		      case IPOPT_CIPSO:
+		        if (opt->cipso) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			opt->cipso = optptr - iph;
+		        if (cipso_v4_validate(&optptr)) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			break;
 		      case IPOPT_SEC:
 		      case IPOPT_SID:
 		      default:
@@ -506,7 +525,6 @@ static int ip_options_get_finish(struct 
 		opt->__data[optlen++] = IPOPT_END;
 	opt->optlen = optlen;
 	opt->is_data = 1;
-	opt->is_setbyuser = 1;
 	if (optlen && ip_options_compile(opt, NULL)) {
 		kfree(opt);
 		return -EINVAL;
diff -puN net/ipv4/ip_output.c~git-net net/ipv4/ip_output.c
--- a/net/ipv4/ip_output.c~git-net
+++ a/net/ipv4/ip_output.c
@@ -83,7 +83,7 @@
 #include <linux/netlink.h>
 #include <linux/tcp.h>
 
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
 
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, i
 			 * keep trying until route appears or the connection times
 			 * itself out.
 			 */
+			security_sk_classify_flow(sk, &fl);
 			if (ip_route_output_flow(&rt, &fl, sk, 0))
 				goto no_route;
 		}
@@ -678,7 +679,7 @@ ip_generic_getfrag(void *from, char *to,
 {
 	struct iovec *iov = from;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
 			return -EFAULT;
 	} else {
@@ -734,7 +735,7 @@ static inline int ip_ufo_append_data(str
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 		sk->sk_sndmsg_off = 0;
 	}
@@ -842,7 +843,7 @@ int ip_append_data(struct sock *sk,
 	    length + fragheaderlen <= mtu &&
 	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
 	    !exthdrlen)
-		csummode = CHECKSUM_HW;
+		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -1365,6 +1366,7 @@ void ip_send_reply(struct sock *sk, stru
 					       { .sport = skb->h.th->dest,
 					         .dport = skb->h.th->source } },
 				    .proto = sk->sk_protocol };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			return;
 	}
diff -puN net/ipv4/ipconfig.c~git-net net/ipv4/ipconfig.c
--- a/net/ipv4/ipconfig.c~git-net
+++ a/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
  *              --  Josef Siemes <jsiemes@web.de>, Aug 2002
  */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
diff -puN net/ipv4/ipmr.c~git-net net/ipv4/ipmr.c
--- a/net/ipv4/ipmr.c~git-net
+++ a/net/ipv4/ipmr.c
@@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mf
 			e = NLMSG_DATA(nlh);
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
-			netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+			rtnl_unicast(skb, NETLINK_CB(skb).pid);
 		} else
 			kfree_skb(skb);
 	}
@@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mf
 
 	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (skb->nh.iph->version == 0) {
-			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mf
 				e->error = -EMSGSIZE;
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
-			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+			rtnl_unicast(skb, NETLINK_CB(skb).pid);
 		} else
 			ip_mr_forward(skb, c, 0);
 	}
diff -puN net/ipv4/ipvs/ip_vs_proto_tcp.c~git-net net/ipv4/ipvs/ip_vs_proto_tcp.c
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c~git-net
+++ a/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, stru
 	switch (skb->ip_summed) {
 	case CHECKSUM_NONE:
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
 				      skb->len - tcphoff,
 				      skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, stru
 		}
 		break;
 	default:
-		/* CHECKSUM_UNNECESSARY */
+		/* No need to checksum. */
 		break;
 	}
 
diff -puN net/ipv4/ipvs/ip_vs_proto_udp.c~git-net net/ipv4/ipvs/ip_vs_proto_udp.c
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c~git-net
+++ a/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, stru
 		case CHECKSUM_NONE:
 			skb->csum = skb_checksum(skb, udphoff,
 						 skb->len - udphoff, 0);
-		case CHECKSUM_HW:
+		case CHECKSUM_COMPLETE:
 			if (csum_tcpudp_magic(skb->nh.iph->saddr,
 					      skb->nh.iph->daddr,
 					      skb->len - udphoff,
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, stru
 			}
 			break;
 		default:
-			/* CHECKSUM_UNNECESSARY */
+			/* No need to checksum. */
 			break;
 		}
 	}
diff -puN net/ipv4/netfilter.c~git-net net/ipv4/netfilter.c
--- a/net/ipv4/netfilter.c~git-net
+++ a/net/ipv4/netfilter.c
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_bu
 	unsigned int csum = 0;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
 			break;
 		if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff -puN net/ipv4/netfilter/ip_conntrack_netbios_ns.c~git-net net/ipv4/netfilter/ip_conntrack_netbios_ns.c
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c~git-net
+++ a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/if_addr.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/route.h>
diff -puN net/ipv4/netfilter/ip_conntrack_proto_tcp.c~git-net net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c~git-net
+++ a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -865,8 +865,7 @@ static int tcp_error(struct sk_buff *skb
   
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because it is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff -puN net/ipv4/netfilter/ip_conntrack_proto_udp.c~git-net net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c~git-net
+++ a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb
 
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff -puN net/ipv4/netfilter/ip_conntrack_sip.c~git-net net/ipv4/netfilter/ip_conntrack_sip.c
--- a/net/ipv4/netfilter/ip_conntrack_sip.c~git-net
+++ a/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/skbuff.h>
diff -puN net/ipv4/netfilter/ip_nat_core.c~git-net net/ipv4/netfilter/ip_nat_core.c
--- a/net/ipv4/netfilter/ip_nat_core.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_core.c
@@ -101,18 +101,6 @@ static void ip_nat_cleanup_conntrack(str
 	write_unlock_bh(&ip_nat_lock);
 }
 
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong.  Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-				      oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
 /* Is this tuple already taken? (not by us) */
 int
 ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,12 +366,12 @@ manip_pkt(u_int16_t proto,
 	iph = (void *)(*pskb)->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
-		iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->saddr, target->src.ip,
+					    iph->check);
 		iph->saddr = target->src.ip;
 	} else {
-		iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
+					    iph->check);
 		iph->daddr = target->dst.ip;
 	}
 	return 1;
@@ -423,10 +411,10 @@ unsigned int ip_nat_packet(struct ip_con
 EXPORT_SYMBOL_GPL(ip_nat_packet);
 
 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_nat_manip_type manip,
-				  enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum,
+				  struct sk_buff **pskb)
 {
 	struct {
 		struct icmphdr icmp;
@@ -434,7 +422,9 @@ int ip_nat_icmp_reply_translation(struct
 	} *inside;
 	struct ip_conntrack_tuple inner, target;
 	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
+	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
 
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -443,12 +433,8 @@ int ip_nat_icmp_reply_translation(struct
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
-		hdrlen = (*pskb)->nh.iph->ihl * 4;
-		if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
-						(*pskb)->len - hdrlen, 0)))
-			return 0;
-	}
+	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+		return 0;
 
 	/* Must be RELATED */
 	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +473,14 @@ int ip_nat_icmp_reply_translation(struct
 		       !manip))
 		return 0;
 
-	/* Reloading "inside" here since manip_pkt inner. */
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-	inside->icmp.checksum = 0;
-	inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
-						       (*pskb)->len - hdrlen,
-						       0));
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+		/* Reloading "inside" here since manip_pkt inner. */
+		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+							       (*pskb)->len - hdrlen,
+							       0));
+	}
 
 	/* Change outer to look the reply to an incoming packet
 	 * (proto 0 means don't invert per-proto part). */
diff -puN net/ipv4/netfilter/ip_nat_helper.c~git-net net/ipv4/netfilter/ip_nat_helper.c
--- a/net/ipv4/netfilter/ip_nat_helper.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_helper.c
@@ -165,7 +165,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff 
 {
 	struct iphdr *iph;
 	struct tcphdr *tcph;
-	int datalen;
+	int oldlen, datalen;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return 0;
@@ -180,13 +180,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff 
 	iph = (*pskb)->nh.iph;
 	tcph = (void *)iph + iph->ihl*4;
 
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
 			match_offset, match_len, rep_buffer, rep_len);
 
 	datalen = (*pskb)->len - iph->ihl*4;
-	tcph->check = 0;
-	tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
-				   csum_partial((char *)tcph, datalen, 0));
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+		tcph->check = 0;
+		tcph->check = tcp_v4_check(tcph, datalen,
+					   iph->saddr, iph->daddr,
+					   csum_partial((char *)tcph,
+					   		datalen, 0));
+	} else
+		tcph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   tcph->check, 1);
 
 	if (rep_len != match_len) {
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +230,7 @@ ip_nat_mangle_udp_packet(struct sk_buff 
 {
 	struct iphdr *iph;
 	struct udphdr *udph;
+	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
 	iph = (*pskb)->nh.iph;
@@ -238,22 +248,32 @@ ip_nat_mangle_udp_packet(struct sk_buff 
 
 	iph = (*pskb)->nh.iph;
 	udph = (void *)iph + iph->ihl*4;
+
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
 			match_offset, match_len, rep_buffer, rep_len);
 
 	/* update the length of the UDP packet */
-	udph->len = htons((*pskb)->len - iph->ihl*4);
+	datalen = (*pskb)->len - iph->ihl*4;
+	udph->len = htons(datalen);
 
 	/* fix udp checksum if udp checksum was previously calculated */
-	if (udph->check) {
-		int datalen = (*pskb)->len - iph->ihl * 4;
+	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+		return 1;
+
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		udph->check = 0;
 		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 		                                datalen, IPPROTO_UDP,
 		                                csum_partial((char *)udph,
 		                                             datalen, 0));
-	}
-
+		if (!udph->check)
+			udph->check = -1;
+	} else
+		udph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   udph->check, 1);
 	return 1;
 }
 EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +313,14 @@ sack_adjust(struct sk_buff *skb,
 			ntohl(sack->start_seq), new_start_seq,
 			ntohl(sack->end_seq), new_end_seq);
 
-		tcph->check = 
-			ip_nat_cheat_check(~sack->start_seq, new_start_seq,
-					   ip_nat_cheat_check(~sack->end_seq, 
-						   	      new_end_seq,
-							      tcph->check));
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->start_seq,
+						   new_start_seq,
+						   tcph->check, 0);
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->end_seq,
+						   new_end_seq,
+						   tcph->check, 0);
 		sack->start_seq = new_start_seq;
 		sack->end_seq = new_end_seq;
 		sackoff += sizeof(*sack);
@@ -381,10 +404,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
 	newack = htonl(newack);
 
-	tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
-					 ip_nat_cheat_check(~tcph->ack_seq, 
-					 		    newack, 
-							    tcph->check));
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
+					   tcph->check, 0);
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
+					   tcph->check, 0);
 
 	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff -puN net/ipv4/netfilter/ip_nat_proto_gre.c~git-net net/ipv4/netfilter/ip_nat_proto_gre.c
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -130,9 +130,10 @@ gre_manip_pkt(struct sk_buff **pskb,
 			if (greh->csum) {
 				/* FIXME: Never tested this code... */
 				*(gre_csum(greh)) = 
-					ip_nat_cheat_check(~*(gre_key(greh)),
+					nf_proto_csum_update(*pskb,
+							~*(gre_key(greh)),
 							tuple->dst.u.gre.key,
-							*(gre_csum(greh)));
+							*(gre_csum(greh)), 0);
 			}
 			*(gre_key(greh)) = tuple->dst.u.gre.key;
 			break;
diff -puN net/ipv4/netfilter/ip_nat_proto_icmp.c~git-net net/ipv4/netfilter/ip_nat_proto_icmp.c
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
 		return 0;
 
 	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-
-	hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
-					    tuple->src.u.icmp.id,
-					    hdr->checksum);
+	hdr->checksum = nf_proto_csum_update(*pskb,
+					     hdr->un.echo.id ^ 0xFFFF,
+					     tuple->src.u.icmp.id,
+					     hdr->checksum, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
 }
diff -puN net/ipv4/netfilter/ip_nat_proto_tcp.c~git-net net/ipv4/netfilter/ip_nat_proto_tcp.c
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-	hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(oldport ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+	hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
+	hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
+					  hdr->check, 0);
 	return 1;
 }
 
diff -puN net/ipv4/netfilter/ip_nat_proto_udp.c~git-net net/ipv4/netfilter/ip_nat_proto_udp.c
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
-	if (hdr->check) /* 0 is a special case meaning no checksum */
-		hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(*portptr ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+
+	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
+		hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
+						  hdr->check, 1);
+		hdr->check = nf_proto_csum_update(*pskb,
+						  *portptr ^ 0xFFFF, newport,
+						  hdr->check, 0);
+		if (!hdr->check)
+			hdr->check = -1;
+	}
 	*portptr = newport;
 	return 1;
 }
diff -puN net/ipv4/netfilter/ip_nat_standalone.c~git-net net/ipv4/netfilter/ip_nat_standalone.c
--- a/net/ipv4/netfilter/ip_nat_standalone.c~git-net
+++ a/net/ipv4/netfilter/ip_nat_standalone.c
@@ -110,11 +110,6 @@ ip_nat_fn(unsigned int hooknum,
 	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
 		       & htons(IP_MF|IP_OFFSET)));
 
-	/* If we had a hardware checksum before, it's now invalid */
-	if ((*pskb)->ip_summed == CHECKSUM_HW)
-		if (skb_checksum_help(*pskb, (out == NULL)))
-			return NF_DROP;
-
 	ct = ip_conntrack_get(*pskb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	   have dropped it.  Hence it's the user's responsibilty to
@@ -145,8 +140,8 @@ ip_nat_fn(unsigned int hooknum,
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
-							   CTINFO2DIR(ctinfo)))
+			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
+							   hooknum, pskb))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
diff -puN net/ipv4/netfilter/ip_queue.c~git-net net/ipv4/netfilter/ip_queue.c
--- a/net/ipv4/netfilter/ip_queue.c~git-net
+++ a/net/ipv4/netfilter/ip_queue.c
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queu
 		break;
 	
 	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
 		}
diff -puN net/ipv4/netfilter/ipt_ECN.c~git-net net/ipv4/netfilter/ipt_ECN.c
--- a/net/ipv4/netfilter/ipt_ECN.c~git-net
+++ a/net/ipv4/netfilter/ipt_ECN.c
@@ -49,10 +49,10 @@ set_ect_ip(struct sk_buff **pskb, const 
 
 /* Return 0 if there was an error. */
 static inline int
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
-	u_int16_t diffs[2];
+	u_int16_t oldval;
 
 	/* Not enought header? */
 	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +70,16 @@ set_ect_tcp(struct sk_buff **pskb, const
 		return 0;
 	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
 
-	if ((*pskb)->ip_summed == CHECKSUM_HW &&
-	    skb_checksum_help(*pskb, inward))
-		return 0;
-
-	diffs[0] = ((u_int16_t *)tcph)[6];
+	oldval = ((u_int16_t *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
 		tcph->ece = einfo->proto.tcp.ece;
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
-	diffs[1] = ((u_int16_t *)tcph)[6];
-	diffs[0] = diffs[0] ^ 0xFFFF;
 
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
-		tcph->check = csum_fold(csum_partial((char *)diffs,
-						     sizeof(diffs),
-						     tcph->check^0xFFFF));
+	tcph->check = nf_proto_csum_update((*pskb),
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 	return 1;
 }
 
@@ -106,7 +100,7 @@ target(struct sk_buff **pskb,
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
 	    && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
-		if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+		if (!set_ect_tcp(pskb, einfo))
 			return NF_DROP;
 
 	return IPT_CONTINUE;
diff -puN net/ipv4/netfilter/ipt_REJECT.c~git-net net/ipv4/netfilter/ipt_REJECT.c
--- a/net/ipv4/netfilter/ipt_REJECT.c~git-net
+++ a/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@ static inline struct rtable *route_rever
 	fl.proto = IPPROTO_TCP;
 	fl.fl_ip_sport = tcph->dest;
 	fl.fl_ip_dport = tcph->source;
+	security_skb_classify_flow(skb, &fl);
 
 	xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
 
@@ -184,6 +185,7 @@ static void send_reset(struct sk_buff *o
 	tcph->urg_ptr = 0;
 
 	/* Adjust TCP checksum */
+	nskb->ip_summed = CHECKSUM_NONE;
 	tcph->check = 0;
 	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
 				   nskb->nh.iph->saddr,
diff -puN net/ipv4/netfilter/ipt_TCPMSS.c~git-net net/ipv4/netfilter/ipt_TCPMSS.c
--- a/net/ipv4/netfilter/ipt_TCPMSS.c~git-net
+++ a/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -27,14 +27,6 @@ MODULE_DESCRIPTION("iptables TCP MSS mod
 #define DEBUGP(format, args...)
 #endif
 
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-                                      oldcheck^0xFFFF));
-}
-
 static inline unsigned int
 optlen(const u_int8_t *opt, unsigned int offset)
 {
@@ -62,10 +54,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	if ((*pskb)->ip_summed == CHECKSUM_HW &&
-	    skb_checksum_help(*pskb, out == NULL))
-		return NF_DROP;
-
 	iph = (*pskb)->nh.iph;
 	tcplen = (*pskb)->len - iph->ihl*4;
 
@@ -119,9 +107,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = (newmss & 0x00ff);
 
-			tcph->check = cheat_check(htons(oldmss)^0xFFFF,
-						  htons(newmss),
-						  tcph->check);
+			tcph->check = nf_proto_csum_update(*pskb,
+							   htons(oldmss)^0xFFFF,
+							   htons(newmss),
+							   tcph->check, 0);
 
 			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
 			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
@@ -161,8 +150,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
  	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
-				  htons(tcplen + TCPOLEN_MSS), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   htons(tcplen) ^ 0xFFFF,
+				           htons(tcplen + TCPOLEN_MSS),
+					   tcph->check, 1);
 	tcplen += TCPOLEN_MSS;
 
 	opt[0] = TCPOPT_MSS;
@@ -170,16 +161,19 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = (newmss & 0x00ff);
 
-	tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
+					   tcph->check, 0);
 
 	oldval = ((u_int16_t *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	tcph->check = cheat_check(oldval ^ 0xFFFF,
-				  ((u_int16_t *)tcph)[6], tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 
 	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
-	iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
-				 newtotlen, iph->check);
+	iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
+				    newtotlen, iph->check);
 	iph->tot_len = newtotlen;
 
 	DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
diff -puN net/ipv4/netfilter/ipt_hashlimit.c~git-net net/ipv4/netfilter/ipt_hashlimit.c
--- a/net/ipv4/netfilter/ipt_hashlimit.c~git-net
+++ a/net/ipv4/netfilter/ipt_hashlimit.c
@@ -454,15 +454,12 @@ hashlimit_match(const struct sk_buff *sk
 		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * 
 							hinfo->cfg.burst);
 		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-
-		spin_unlock_bh(&hinfo->lock);
-		return 1;
+	} else {
+		/* update expiration timeout */
+		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+		rateinfo_recalc(dh, now);
 	}
 
-	/* update expiration timeout */
-	dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-
-	rateinfo_recalc(dh, now);
 	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 		/* We're underlimit. */
 		dh->rateinfo.credit -= dh->rateinfo.cost;
diff -puN net/ipv4/proc.c~git-net net/ipv4/proc.c
--- a/net/ipv4/proc.c~git-net
+++ a/net/ipv4/proc.c
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_l
 	SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
 	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_SENTINEL
 };
 
diff -puN net/ipv4/raw.c~git-net net/ipv4/raw.c
--- a/net/ipv4/raw.c~git-net
+++ a/net/ipv4/raw.c
@@ -38,8 +38,7 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
- 
-#include <linux/config.h> 
+
 #include <linux/types.h>
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
@@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *ioc
 		if (!inet->hdrincl)
 			raw_probe_proto_opt(&fl, msg);
 
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 	}
 	if (err)
diff -puN net/ipv4/route.c~git-net net/ipv4/route.c
--- a/net/ipv4/route.c~git-net
+++ a/net/ipv4/route.c
@@ -2639,51 +2639,54 @@ static int rt_fill_info(struct sk_buff *
 {
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtmsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	struct nlmsghdr *nlh;
 	struct rta_cacheinfo ci;
-#ifdef CONFIG_IP_MROUTE
-	struct rtattr *eptr;
-#endif
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
+
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	r = nlmsg_data(nlh);
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= rt->fl.fl4_tos;
 	r->rtm_table	= RT_TABLE_MAIN;
+	NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
 	r->rtm_protocol = RTPROT_UNSPEC;
 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	if (rt->rt_flags & RTCF_NOTIFY)
 		r->rtm_flags |= RTM_F_NOTIFY;
-	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+
+	NLA_PUT_U32(skb, RTA_DST, rt->rt_dst);
+
 	if (rt->fl.fl4_src) {
 		r->rtm_src_len = 32;
-		RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+		NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
 	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
 	if (rt->u.dst.tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
-		__u32 alg = rt->rt_multipath_alg;
-
-		RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
-	}
+	if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+		NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
 #endif
 	if (rt->fl.iif)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+		NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst);
 	else if (rt->rt_src != rt->fl.fl4_src)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+		NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src);
+
 	if (rt->rt_dst != rt->rt_gateway)
-		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+		NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway);
+
 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	ci.rta_lastuse	= jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	ci.rta_used	= rt->u.dst.__use;
 	ci.rta_clntref	= atomic_read(&rt->u.dst.__refcnt);
@@ -2700,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *
 			ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
 		}
 	}
-#ifdef CONFIG_IP_MROUTE
-	eptr = (struct rtattr*)skb->tail;
-#endif
-	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
 	if (rt->fl.iif) {
 #ifdef CONFIG_IP_MROUTE
 		u32 dst = rt->rt_dst;
@@ -2715,41 +2715,46 @@ static int rt_fill_info(struct sk_buff *
 				if (!nowait) {
 					if (err == 0)
 						return 0;
-					goto nlmsg_failure;
+					goto nla_put_failure;
 				} else {
 					if (err == -EMSGSIZE)
-						goto nlmsg_failure;
-					((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+						goto nla_put_failure;
+					ci.rta_error = err;
 				}
 			}
 		} else
 #endif
-			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
 	struct rtable *rt = NULL;
-	u32 dst = 0;
-	u32 src = 0;
-	int iif = 0;
-	int err = -ENOBUFS;
+	u32 dst, src, iif;
+	int err;
 	struct sk_buff *skb;
 
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	rtm = nlmsg_data(nlh);
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		goto out;
+	if (skb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
@@ -2760,62 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in
 	skb->nh.iph->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
-	if (rta[RTA_SRC - 1])
-		memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
-	if (rta[RTA_DST - 1])
-		memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
-	if (rta[RTA_IIF - 1])
-		memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
+	src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0;
+	dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0;
+	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 
 	if (iif) {
-		struct net_device *dev = __dev_get_by_index(iif);
-		err = -ENODEV;
-		if (!dev)
-			goto out_free;
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(iif);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto errout_free;
+		}
+
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
 		local_bh_disable();
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
-		rt = (struct rtable*)skb->dst;
-		if (!err && rt->u.dst.error)
+
+		rt = (struct rtable*) skb->dst;
+		if (err == 0 && rt->u.dst.error)
 			err = -rt->u.dst.error;
 	} else {
-		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
-							 .saddr = src,
-							 .tos = rtm->rtm_tos } } };
-		int oif = 0;
-		if (rta[RTA_OIF - 1])
-			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
-		fl.oif = oif;
+		struct flowi fl = {
+			.nl_u = {
+				.ip4_u = {
+					.daddr = dst,
+					.saddr = src,
+					.tos = rtm->rtm_tos,
+				},
+			},
+			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+		};
 		err = ip_route_output_key(&rt, &fl);
 	}
+
 	if (err)
-		goto out_free;
+		goto errout_free;
 
 	skb->dst = &rt->u.dst;
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
 	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 				RTM_NEWROUTE, 0, 0);
-	if (!err)
-		goto out_free;
-	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
-	}
+	if (err <= 0)
+		goto errout_free;
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:	return err;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout:
+	return err;
 
-out_free:
+errout_free:
 	kfree_skb(skb);
-	goto out;
+	goto errout;
 }
 
 int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
@@ -3157,7 +3161,7 @@ int __init ip_rt_init(void)
 					rhash_entries,
 					(num_physpages >= 128 * 1024) ?
 					15 : 17,
-					HASH_HIGHMEM,
+					0,
 					&rt_hash_log,
 					&rt_hash_mask,
 					0);
diff -puN net/ipv4/syncookies.c~git-net net/ipv4/syncookies.c
--- a/net/ipv4/syncookies.c~git-net
+++ a/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock
 	if (!req)
 		goto out;
 
+	if (security_inet_conn_request(sk, skb, req)) {
+		reqsk_free(req);
+		goto out;
+	}
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
 	treq->rcv_isn		= htonl(skb->h.th->seq) - 1;
@@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock
 				    .uli_u = { .ports =
 					       { .sport = skb->h.th->dest,
 						 .dport = skb->h.th->source } } };
+		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
 			goto out; 
diff -puN net/ipv4/sysctl_net_ipv4.c~git-net net/ipv4/sysctl_net_ipv4.c
--- a/net/ipv4/sysctl_net_ipv4.c~git-net
+++ a/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
+#include <net/cipso_ipv4.h>
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
@@ -697,6 +698,40 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+#ifdef CONFIG_NETLABEL
+	{
+		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
+		.procname	= "cipso_cache_enable",
+		.data		= &cipso_v4_cache_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_CACHE_BUCKET_SIZE,
+		.procname	= "cipso_cache_bucket_size",
+		.data		= &cipso_v4_cache_bucketsize,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_RBM_OPTFMT,
+		.procname	= "cipso_rbm_optfmt",
+		.data		= &cipso_v4_rbm_optfmt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_RBM_STRICTVALID,
+		.procname	= "cipso_rbm_strictvalid",
+		.data		= &cipso_v4_rbm_strictvalid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif /* CONFIG_NETLABEL */
 	{ .ctl_name = 0 }
 };
 
diff -puN net/ipv4/tcp.c~git-net net/ipv4/tcp.c
--- a/net/ipv4/tcp.c~git-net
+++ a/net/ipv4/tcp.c
@@ -268,7 +268,7 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
-int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
 
@@ -568,7 +568,7 @@ new_segment:
 		skb->truesize += copy;
 		sk->sk_wmem_queued += copy;
 		sk->sk_forward_alloc -= copy;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		tp->write_seq += copy;
 		TCP_SKB_CB(skb)->end_seq += copy;
 		skb_shinfo(skb)->gso_segs = 0;
@@ -723,7 +723,7 @@ new_segment:
 				 * Check whether we can use HW checksum.
 				 */
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
-					skb->ip_summed = CHECKSUM_HW;
+					skb->ip_summed = CHECKSUM_PARTIAL;
 
 				skb_entail(sk, tp, skb);
 				copy = size_goal;
@@ -2205,7 +2205,7 @@ struct sk_buff *tcp_tso_segment(struct s
 		th->fin = th->psh = 0;
 
 		th->check = ~csum_fold(th->check + delta);
-		if (skb->ip_summed != CHECKSUM_HW)
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			th->check = csum_fold(csum_partial(skb->h.raw, thlen,
 							   skb->csum));
 
@@ -2219,7 +2219,7 @@ struct sk_buff *tcp_tso_segment(struct s
 
 	delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
 	th->check = ~csum_fold(th->check + delta);
-	if (skb->ip_summed != CHECKSUM_HW)
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
 		th->check = csum_fold(csum_partial(skb->h.raw, thlen,
 						   skb->csum));
 
diff -puN net/ipv4/tcp_input.c~git-net net/ipv4/tcp_input.c
--- a/net/ipv4/tcp_input.c~git-net
+++ a/net/ipv4/tcp_input.c
@@ -72,24 +72,24 @@
 #include <asm/unaligned.h>
 #include <net/netdma.h>
 
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-int sysctl_tcp_fack = 1;
-int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn;
-int sysctl_tcp_dsack = 1;
-int sysctl_tcp_app_win = 31;
-int sysctl_tcp_adv_win_scale = 2;
-
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_frto;
-int sysctl_tcp_nometrics_save;
+int sysctl_tcp_timestamps __read_mostly = 1;
+int sysctl_tcp_window_scaling __read_mostly = 1;
+int sysctl_tcp_sack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_dsack __read_mostly = 1;
+int sysctl_tcp_app_win __read_mostly = 31;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
+
+int sysctl_tcp_stdurg __read_mostly;
+int sysctl_tcp_rfc1337 __read_mostly;
+int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
 
-int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc = 1;
+int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
+int sysctl_tcp_abc __read_mostly = 1;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
diff -puN net/ipv4/tcp_ipv4.c~git-net net/ipv4/tcp_ipv4.c
--- a/net/ipv4/tcp_ipv4.c~git-net
+++ a/net/ipv4/tcp_ipv4.c
@@ -78,8 +78,8 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-int sysctl_tcp_tw_reuse;
-int sysctl_tcp_low_latency;
+int sysctl_tcp_tw_reuse __read_mostly;
+int sysctl_tcp_low_latency __read_mostly;
 
 /* Check TCP sequence numbers in ICMP packets. */
 #define ICMP_MIN_LENGTH 8
@@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, 
 	struct inet_sock *inet = inet_sk(sk);
 	struct tcphdr *th = skb->h.th;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
 		skb->csum = offsetof(struct tcphdr, check);
 	} else {
@@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff
 	th->check = 0;
 	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
 	skb->csum = offsetof(struct tcphdr, check);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk,
 
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
@@ -948,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struc
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
-					th->source, skb->nh.iph->daddr,
-					ntohs(th->dest), inet_iif(skb));
+	nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
+				      th->source, skb->nh.iph->daddr,
+				      th->dest, inet_iif(skb));
 
 	if (nsk) {
 		if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -970,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struc
 
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
 				  skb->nh.iph->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1087,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
 	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
-			   skb->nh.iph->daddr, ntohs(th->dest),
+			   skb->nh.iph->daddr, th->dest,
 			   inet_iif(skb));
 
 	if (!sk)
@@ -1165,7 +1168,7 @@ do_time_wait:
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
 							skb->nh.iph->daddr,
-							ntohs(th->dest),
+							th->dest,
 							inet_iif(skb));
 		if (sk2) {
 			inet_twsk_deschedule((struct inet_timewait_sock *)sk,
diff -puN net/ipv4/tcp_lp.c~git-net net/ipv4/tcp_lp.c
--- a/net/ipv4/tcp_lp.c~git-net
+++ a/net/ipv4/tcp_lp.c
@@ -31,7 +31,6 @@
  * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <net/tcp.h>
 
diff -puN net/ipv4/tcp_minisocks.c~git-net net/ipv4/tcp_minisocks.c
--- a/net/ipv4/tcp_minisocks.c~git-net
+++ a/net/ipv4/tcp_minisocks.c
@@ -34,8 +34,8 @@
 #define SYNC_INIT 1
 #endif
 
-int sysctl_tcp_syncookies = SYNC_INIT; 
-int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+int sysctl_tcp_abort_on_overflow __read_mostly;
 
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
diff -puN net/ipv4/tcp_output.c~git-net net/ipv4/tcp_output.c
--- a/net/ipv4/tcp_output.c~git-net
+++ a/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
 #include <linux/smp_lock.h>
 
 /* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse = 1;
+int sysctl_tcp_retrans_collapse __read_mostly = 1;
 
 /* People can turn this on to  work with those rare, broken TCPs that
  * interpret the window field as a signed quantity.
  */
-int sysctl_tcp_workaround_signed_windows = 0;
+int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
 
 /* This limits the percentage of the congestion window which we
  * will allow a single TSO frame to consume.  Building TSO frames
  * which are too large can cause TCP streams to be bursty.
  */
-int sysctl_tcp_tso_win_divisor = 3;
+int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-int sysctl_tcp_mtu_probing = 0;
-int sysctl_tcp_base_mss = 512;
+int sysctl_tcp_mtu_probing __read_mostly = 0;
+int sysctl_tcp_base_mss __read_mostly = 512;
 
 /* By default, RFC2861 behavior.  */
-int sysctl_tcp_slow_start_after_idle = 1;
+int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
 static void update_send_head(struct sock *sk, struct tcp_sock *tp,
 			     struct sk_buff *skb)
@@ -466,7 +466,8 @@ static int tcp_transmit_skb(struct sock 
 	if (skb->len != tcp_header_size)
 		tcp_event_data_sent(tp, skb, sk);
 
-	TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
+		TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
 	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (likely(err <= 0))
@@ -575,7 +576,7 @@ int tcp_fragment(struct sock *sk, struct
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
-	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Copy and checksum data tail into the new buffer. */
 		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
 						       nsize, 0);
@@ -584,7 +585,7 @@ int tcp_fragment(struct sock *sk, struct
 
 		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
 	} else {
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb_split(skb, buff, len);
 	}
 
@@ -687,7 +688,7 @@ int tcp_trim_head(struct sock *sk, struc
 		__pskb_trim_head(skb, len - skb_headlen(skb));
 
 	TCP_SKB_CB(skb)->seq += len;
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 
 	skb->truesize	     -= len;
 	sk->sk_wmem_queued   -= len;
@@ -1060,7 +1061,7 @@ static int tso_fragment(struct sock *sk,
 	/* This packet was never sent out yet, so no SACK bits. */
 	TCP_SKB_CB(buff)->sacked = 0;
 
-	buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
 	skb_split(skb, buff, len);
 
 	/* Fix up tso_factor for both original and new SKB.  */
@@ -1204,8 +1205,7 @@ static int tcp_mtu_probe(struct sock *sk
 	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
-		nskb->ip_summed = CHECKSUM_HW;
+	nskb->ip_summed = skb->ip_summed;
 
 	len = 0;
 	while (len < probe_size) {
@@ -1229,7 +1229,7 @@ static int tcp_mtu_probe(struct sock *sk
 			                           ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
-				if (skb->ip_summed != CHECKSUM_HW)
+				if (skb->ip_summed != CHECKSUM_PARTIAL)
 					skb->csum = csum_partial(skb->data, skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
@@ -1570,10 +1570,9 @@ static void tcp_retrans_try_collapse(str
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
 
-		if (next_skb->ip_summed == CHECKSUM_HW)
-			skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = next_skb->ip_summed;
 
-		if (skb->ip_summed != CHECKSUM_HW)
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
 
 		/* Update sequence range on original skb. */
@@ -2157,10 +2156,9 @@ int tcp_connect(struct sock *sk)
 	skb_shinfo(buff)->gso_size = 0;
 	skb_shinfo(buff)->gso_type = 0;
 	buff->csum = 0;
+	tp->snd_nxt = tp->write_seq;
 	TCP_SKB_CB(buff)->seq = tp->write_seq++;
 	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
-	tp->snd_nxt = tp->write_seq;
-	tp->pushed_seq = tp->write_seq;
 
 	/* Send it off. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2170,6 +2168,12 @@ int tcp_connect(struct sock *sk)
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+
+	/* We change tp->snd_nxt after the tcp_transmit_skb() call
+	 * in order to make this packet get counted in tcpOutSegs.
+	 */
+	tp->snd_nxt = tp->write_seq;
+	tp->pushed_seq = tp->write_seq;
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
diff -puN net/ipv4/tcp_probe.c~git-net net/ipv4/tcp_probe.c
--- a/net/ipv4/tcp_probe.c~git-net
+++ a/net/ipv4/tcp_probe.c
@@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file
 	error = wait_event_interruptible(tcpw.wait,
 					 __kfifo_len(tcpw.fifo) != 0);
 	if (error)
-		return error;
+		goto out_free;
 
 	cnt = kfifo_get(tcpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt);
 
+out_free:
 	vfree(tbuf);
 
 	return error ? error : cnt;
diff -puN net/ipv4/tcp_timer.c~git-net net/ipv4/tcp_timer.c
--- a/net/ipv4/tcp_timer.c~git-net
+++ a/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
 #include <linux/module.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
-int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
-int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
-int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
-int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
-int sysctl_tcp_retries1 = TCP_RETR1;
-int sysctl_tcp_retries2 = TCP_RETR2;
-int sysctl_tcp_orphan_retries;
+int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
+int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
+int sysctl_tcp_orphan_retries __read_mostly;
 
 static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
diff -puN net/ipv4/tcp_veno.c~git-net net/ipv4/tcp_veno.c
--- a/net/ipv4/tcp_veno.c~git-net
+++ a/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
  * 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
  */
 
-#include <linux/config.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
diff -puN net/ipv4/udp.c~git-net net/ipv4/udp.c
--- a/net/ipv4/udp.c~git-net
+++ a/net/ipv4/udp.c
@@ -429,7 +429,7 @@ static int udp_push_pending_frames(struc
 		/*
 		 * Only one fragment on the socket.
 		 */
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			skb->csum = offsetof(struct udphdr, check);
 			uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
 					up->len, IPPROTO_UDP, 0);
@@ -448,7 +448,7 @@ static int udp_push_pending_frames(struc
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together.
 		 */
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			int offset = (unsigned char *)uh - skb->data;
 			skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -603,6 +603,7 @@ int udp_sendmsg(struct kiocb *iocb, stru
 				    .uli_u = { .ports =
 					       { .sport = inet->sport,
 						 .dport = dport } } };
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 		if (err)
 			goto out;
@@ -661,6 +662,16 @@ out:
 		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
 		return len;
 	}
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+	}
 	return err;
 
 do_confirm:
@@ -980,6 +991,7 @@ static int udp_encap_rcv(struct sock * s
 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
+	int rc;
 
 	/*
 	 *	Charge it to the socket, dropping if the queue is full.
@@ -1026,7 +1038,10 @@ static int udp_queue_rcv_skb(struct sock
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
+	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -1087,7 +1102,7 @@ static void udp_checksum_init(struct sk_
 {
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else if (skb->ip_summed == CHECKSUM_HW) {
+	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
diff -puN net/ipv4/xfrm4_output.c~git-net net/ipv4/xfrm4_output.c
--- a/net/ipv4/xfrm4_output.c~git-net
+++ a/net/ipv4/xfrm4_output.c
@@ -48,8 +48,8 @@ static int xfrm4_output_one(struct sk_bu
 	struct xfrm_state *x = dst->xfrm;
 	int err;
 	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
 		if (err)
 			goto error_nolock;
 	}
diff -puN net/ipv6/Kconfig~git-net net/ipv6/Kconfig
--- a/net/ipv6/Kconfig~git-net
+++ a/net/ipv6/Kconfig
@@ -135,3 +135,10 @@ config IPV6_TUNNEL
 
 	  If unsure, say N.
 
+config IPV6_MULTIPLE_TABLES
+	bool "IPv6: Multiple Routing Tables"
+	depends on IPV6 && EXPERIMENTAL
+	select FIB_RULES
+	---help---
+	  Support multiple routing tables.
+
diff -puN net/ipv6/Makefile~git-net net/ipv6/Makefile
--- a/net/ipv6/Makefile~git-net
+++ a/net/ipv6/Makefile
@@ -13,6 +13,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_ou
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-objs += $(ipv6-y)
 
 obj-$(CONFIG_INET6_AH) += ah6.o
diff -puN net/ipv6/addrconf.c~git-net net/ipv6/addrconf.c
--- a/net/ipv6/addrconf.c~git-net
+++ a/net/ipv6/addrconf.c
@@ -48,6 +48,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_arcnet.h>
 #include <linux/if_infiniband.h>
@@ -72,6 +73,7 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 
@@ -144,7 +146,7 @@ static int ipv6_chk_same_addr(const stru
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -175,7 +177,7 @@ struct ipv6_devconf ipv6_devconf = {
 #endif
 };
 
-static struct ipv6_devconf ipv6_devconf_dflt = {
+static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -1525,7 +1527,7 @@ addrconf_prefix_route(struct in6_addr *p
 	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
 		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX);
 }
 
 /* Create "default" multicast route to the interface */
@@ -1542,7 +1544,7 @@ static void addrconf_add_mroute(struct n
 	rtmsg.rtmsg_ifindex = dev->ifindex;
 	rtmsg.rtmsg_flags = RTF_UP;
 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL);
 }
 
 static void sit_route_add(struct net_device *dev)
@@ -1559,7 +1561,7 @@ static void sit_route_add(struct net_dev
 	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
 	rtmsg.rtmsg_ifindex	= dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN);
 }
 
 static void addrconf_add_lroute(struct net_device *dev)
@@ -1909,11 +1911,11 @@ static int inet6_addr_add(int ifindex, s
 	ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
 
 	if (!IS_ERR(ifp)) {
-		spin_lock(&ifp->lock);
+		spin_lock_bh(&ifp->lock);
 		ifp->valid_lft = valid_lft;
 		ifp->prefered_lft = prefered_lft;
 		ifp->tstamp = jiffies;
-		spin_unlock(&ifp->lock);
+		spin_unlock_bh(&ifp->lock);
 
 		addrconf_dad_start(ifp, 0);
 		in6_ifa_put(ifp);
@@ -3267,9 +3269,7 @@ static int inet6_rtm_getaddr(struct sk_b
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 out:
 	in6_ifa_put(ifa);
 	return err;
@@ -3281,20 +3281,23 @@ out_free:
 static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
+	int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
 static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3435,20 +3438,23 @@ static int inet6_dump_ifinfo(struct sk_b
 void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE);
+	int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
+	int err = -ENOBUFS;
 	
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
 /* Maximum length of prefix_cacheinfo attributes */
@@ -3500,20 +3506,23 @@ static void inet6_prefix_notify(int even
 			 struct prefix_info *pinfo)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE);
+	int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
 }
 
 static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -3528,6 +3537,9 @@ static struct rtnetlink_link inet6_rtnet
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
 				      .dumpit	= inet6_dump_fib, },
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib6_rules_dump,   },
+#endif
 };
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3653,7 +3665,7 @@ static struct addrconf_sysctl_table
 	ctl_table addrconf_conf_dir[2];
 	ctl_table addrconf_proto_dir[2];
 	ctl_table addrconf_root_dir[2];
-} addrconf_sysctl = {
+} addrconf_sysctl __read_mostly = {
 	.sysctl_header = NULL,
 	.addrconf_vars = {
         	{
diff -puN net/ipv6/af_inet6.c~git-net net/ipv6/af_inet6.c
--- a/net/ipv6/af_inet6.c~git-net
+++ a/net/ipv6/af_inet6.c
@@ -67,7 +67,7 @@ MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
 
-int sysctl_ipv6_bindv6only;
+int sysctl_ipv6_bindv6only __read_mostly;
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -637,6 +637,7 @@ int inet6_sk_rebuild_header(struct sock 
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet->dport;
 		fl.fl_ip_sport = inet->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (np->opt && np->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
diff -puN net/ipv6/datagram.c~git-net net/ipv6/datagram.c
--- a/net/ipv6/datagram.c~git-net
+++ a/net/ipv6/datagram.c
@@ -156,6 +156,8 @@ ipv4_connected:
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
 
+	security_sk_classify_flow(sk, &fl);
+
 	if (flowlabel) {
 		if (flowlabel->opt && flowlabel->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
diff -puN net/ipv6/exthdrs.c~git-net net/ipv6/exthdrs.c
--- a/net/ipv6/exthdrs.c~git-net
+++ a/net/ipv6/exthdrs.c
@@ -294,7 +294,7 @@ looped_back:
 		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
 	i = n - --hdr->segments_left;
diff -puN /dev/null net/ipv6/fib6_rules.c
--- /dev/null
+++ a/net/ipv6/fib6_rules.c
@@ -0,0 +1,258 @@
+/*
+ * net/ipv6/fib6_rules.c	IPv6 Routing Policy Rules
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors
+ *	Thomas Graf		<tgraf@suug.ch>
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ */
+
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+#include <net/fib_rules.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netlink.h>
+
+struct fib6_rule
+{
+	struct fib_rule		common;
+	struct rt6key		src;
+	struct rt6key		dst;
+	u8			tclass;
+};
+
+static struct fib_rules_ops fib6_rules_ops;
+
+static struct fib6_rule main_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFE,
+		.action =	FR_ACT_TO_TBL,
+		.table =	RT6_TABLE_MAIN,
+	},
+};
+
+static struct fib6_rule local_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0,
+		.action =	FR_ACT_TO_TBL,
+		.table =	RT6_TABLE_LOCAL,
+		.flags =	FIB_RULE_PERMANENT,
+	},
+};
+
+static LIST_HEAD(fib6_rules);
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	struct fib_lookup_arg arg = {
+		.lookup_ptr = lookup,
+	};
+
+	fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg);
+	if (arg.rule)
+		fib_rule_put(arg.rule);
+
+	if (arg.result)
+		return (struct dst_entry *) arg.result;
+
+	dst_hold(&ip6_null_entry.u.dst);
+	return &ip6_null_entry.u.dst;
+}
+
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
+	pol_lookup_t lookup = arg->lookup_ptr;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		rt = &ip6_null_entry;
+		goto discard_pkt;
+	default:
+	case FR_ACT_BLACKHOLE:
+		rt = &ip6_blk_hole_entry;
+		goto discard_pkt;
+	case FR_ACT_PROHIBIT:
+		rt = &ip6_prohibit_entry;
+		goto discard_pkt;
+	}
+
+	table = fib6_get_table(rule->table);
+	if (table)
+		rt = lookup(table, flp, flags);
+
+	if (rt != &ip6_null_entry)
+		goto out;
+	dst_release(&rt->u.dst);
+	rt = NULL;
+	goto out;
+
+discard_pkt:
+	dst_hold(&rt->u.dst);
+out:
+	arg->result = rt;
+	return rt == NULL ? -EAGAIN : 0;
+}
+
+
+static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct fib6_rule *r = (struct fib6_rule *) rule;
+
+	if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
+		return 0;
+
+	if ((flags & RT6_F_HAS_SADDR) &&
+	    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+		return 0;
+
+	return 1;
+}
+
+static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
+	[FRA_DST]	= { .minlen = sizeof(struct in6_addr) },
+	[FRA_TABLE]	= { .type = NLA_U32 },
+};
+
+static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len > 128 || frh->dst_len > 128 ||
+	    (frh->tos & ~IPV6_FLOWINFO_MASK))
+		goto errout;
+
+	if (rule->action == FR_ACT_TO_TBL) {
+		if (rule->table == RT6_TABLE_UNSPEC)
+			goto errout;
+
+		if (fib6_new_table(rule->table) == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
+	}
+
+	if (tb[FRA_SRC])
+		nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
+			   sizeof(struct in6_addr));
+
+	if (tb[FRA_DST])
+		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
+			   sizeof(struct in6_addr));
+
+	rule6->src.plen = frh->src_len;
+	rule6->dst.plen = frh->dst_len;
+	rule6->tclass = frh->tos;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len && (rule6->src.plen != frh->src_len))
+		return 0;
+
+	if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
+		return 0;
+
+	if (frh->tos && (rule6->tclass != frh->tos))
+		return 0;
+
+	if (tb[FRA_SRC] &&
+	    nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	if (tb[FRA_DST] &&
+	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	return 1;
+}
+
+static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	frh->family = AF_INET6;
+	frh->dst_len = rule6->dst.plen;
+	frh->src_len = rule6->src.plen;
+	frh->tos = rule6->tclass;
+
+	if (rule6->dst.plen)
+		NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
+			&rule6->dst.addr);
+
+	if (rule6->src.plen)
+		NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
+			&rule6->src.addr);
+
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return fib_rules_dump(skb, cb, AF_INET6);
+}
+
+static u32 fib6_rule_default_pref(void)
+{
+	return 0x3FFF;
+}
+
+static struct fib_rules_ops fib6_rules_ops = {
+	.family			= AF_INET6,
+	.rule_size		= sizeof(struct fib6_rule),
+	.action			= fib6_rule_action,
+	.match			= fib6_rule_match,
+	.configure		= fib6_rule_configure,
+	.compare		= fib6_rule_compare,
+	.fill			= fib6_rule_fill,
+	.default_pref		= fib6_rule_default_pref,
+	.nlgroup		= RTNLGRP_IPV6_RULE,
+	.policy			= fib6_rule_policy,
+	.rules_list		= &fib6_rules,
+	.owner			= THIS_MODULE,
+};
+
+void __init fib6_rules_init(void)
+{
+	list_add_tail(&local_rule.common.list, &fib6_rules);
+	list_add_tail(&main_rule.common.list, &fib6_rules);
+
+	fib_rules_register(&fib6_rules_ops);
+}
+
+void fib6_rules_cleanup(void)
+{
+	fib_rules_unregister(&fib6_rules_ops);
+}
diff -puN net/ipv6/icmp.c~git-net net/ipv6/icmp.c
--- a/net/ipv6/icmp.c~git-net
+++ a/net/ipv6/icmp.c
@@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff 
 	return 0;
 }
 
-static int sysctl_icmpv6_time = 1*HZ; 
+static int sysctl_icmpv6_time __read_mostly = 1*HZ;
 
 /* 
  * Check the ICMP output rate limit 
@@ -358,6 +358,7 @@ void icmpv6_send(struct sk_buff *skb, in
 	fl.oif = iif;
 	fl.fl_icmp_type = type;
 	fl.fl_icmp_code = code;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -472,6 +473,7 @@ static void icmpv6_echo_reply(struct sk_
 		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
 	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -604,7 +606,7 @@ static int icmpv6_rcv(struct sk_buff **p
 
 	/* Perform checksum. */
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
 				     skb->csum))
 			break;
diff -puN net/ipv6/inet6_connection_sock.c~git-net net/ipv6/inet6_connection_sock.c
--- a/net/ipv6/inet6_connection_sock.c~git-net
+++ a/net/ipv6/inet6_connection_sock.c
@@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, 
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_sport = inet->sport;
 	fl.fl_ip_dport = inet->dport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
diff -puN net/ipv6/ip6_fib.c~git-net net/ipv6/ip6_fib.c
--- a/net/ipv6/ip6_fib.c~git-net
+++ a/net/ipv6/ip6_fib.c
@@ -26,6 +26,7 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
+#include <linux/list.h>
 
 #ifdef 	CONFIG_PROC_FS
 #include <linux/proc_fs.h>
@@ -68,8 +69,7 @@ struct fib6_cleaner_t
 	void *arg;
 };
 
-DEFINE_RWLOCK(fib6_walker_lock);
-
+static DEFINE_RWLOCK(fib6_walker_lock);
 
 #ifdef CONFIG_IPV6_SUBTREES
 #define FWS_INIT FWS_S
@@ -81,6 +81,8 @@ DEFINE_RWLOCK(fib6_walker_lock);
 
 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+static int fib6_walk(struct fib6_walker_t *w);
+static int fib6_walk_continue(struct fib6_walker_t *w);
 
 /*
  *	A routing update causes an increase of the serial number on the
@@ -93,13 +95,31 @@ static __u32 rt_sernum;
 
 static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
 
-struct fib6_walker_t fib6_walker_list = {
+static struct fib6_walker_t fib6_walker_list = {
 	.prev	= &fib6_walker_list,
 	.next	= &fib6_walker_list, 
 };
 
 #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
 
+static inline void fib6_walker_link(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	w->next = fib6_walker_list.next;
+	w->prev = &fib6_walker_list;
+	w->next->prev = w;
+	w->prev->next = w;
+	write_unlock_bh(&fib6_walker_lock);
+}
+
+static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	w->next->prev = w->prev;
+	w->prev->next = w->next;
+	w->prev = w->next = w;
+	write_unlock_bh(&fib6_walker_lock);
+}
 static __inline__ u32 fib6_new_sernum(void)
 {
 	u32 n = ++rt_sernum;
@@ -147,6 +167,253 @@ static __inline__ void rt6_release(struc
 		dst_free(&rt->u.dst);
 }
 
+static struct fib6_table fib6_main_tbl = {
+	.tb6_id		= RT6_TABLE_MAIN,
+	.tb6_lock	= RW_LOCK_UNLOCKED,
+	.tb6_root	= {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	},
+};
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB_TABLE_HASHSZ 256
+#else
+#define FIB_TABLE_HASHSZ 1
+#endif
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+static void fib6_link_table(struct fib6_table *tb)
+{
+	unsigned int h;
+
+	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
+
+	/*
+	 * No protection necessary, this is the only list mutatation
+	 * operation, tables never disappear once they exist.
+	 */
+	hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+}
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+static struct fib6_table fib6_local_tbl = {
+	.tb6_id		= RT6_TABLE_LOCAL,
+	.tb6_lock	= RW_LOCK_UNLOCKED,
+	.tb6_root 	= {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	},
+};
+
+static struct fib6_table *fib6_alloc_table(u32 id)
+{
+	struct fib6_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_ATOMIC);
+	if (table != NULL) {
+		table->tb6_id = id;
+		table->tb6_lock = RW_LOCK_UNLOCKED;
+		table->tb6_root.leaf = &ip6_null_entry;
+		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+	}
+
+	return table;
+}
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+	struct fib6_table *tb;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	tb = fib6_get_table(id);
+	if (tb)
+		return tb;
+
+	tb = fib6_alloc_table(id);
+	if (tb != NULL)
+		fib6_link_table(tb);
+
+	return tb;
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+	struct fib6_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
+		if (tb->tb6_id == id) {
+			rcu_read_unlock();
+			return tb;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void __init fib6_tables_init(void)
+{
+	fib6_link_table(&fib6_main_tbl);
+	fib6_link_table(&fib6_local_tbl);
+}
+
+#else
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+	return fib6_get_table(id);
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+	return &fib6_main_tbl;
+}
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
+}
+
+static void __init fib6_tables_init(void)
+{
+	fib6_link_table(&fib6_main_tbl);
+}
+
+#endif
+
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+	int res;
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->u.next) {
+		res = rt6_dump_route(rt, w->args);
+		if (res < 0) {
+			/* Frame is full, suspend walking */
+			w->leaf = rt;
+			return 1;
+		}
+		BUG_TRAP(res!=0);
+	}
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w = (void*)cb->args[2];
+
+	if (w) {
+		cb->args[2] = 0;
+		kfree(w);
+	}
+	cb->done = (void*)cb->args[3];
+	cb->args[1] = 3;
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+	fib6_dump_end(cb);
+	return cb->done ? cb->done(cb) : 0;
+}
+
+static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+			   struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w;
+	int res;
+
+	w = (void *)cb->args[2];
+	w->root = &table->tb6_root;
+
+	if (cb->args[4] == 0) {
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res > 0)
+			cb->args[4] = 1;
+	} else {
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk_continue(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res != 0) {
+			if (res < 0)
+				fib6_walker_unlink(w);
+			goto end;
+		}
+		fib6_walker_unlink(w);
+		cb->args[4] = 0;
+	}
+end:
+	return res;
+}
+
+int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct rt6_rtnl_dump_arg arg;
+	struct fib6_walker_t *w;
+	struct fib6_table *tb;
+	struct hlist_node *node;
+	int res = 0;
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	w = (void *)cb->args[2];
+	if (w == NULL) {
+		/* New dump:
+		 *
+		 * 1. hook callback destructor.
+		 */
+		cb->args[3] = (long)cb->done;
+		cb->done = fib6_dump_done;
+
+		/*
+		 * 2. allocate and initialize walker.
+		 */
+		w = kzalloc(sizeof(*w), GFP_ATOMIC);
+		if (w == NULL)
+			return -ENOMEM;
+		w->func = fib6_dump_node;
+		cb->args[2] = (long)w;
+	}
+
+	arg.skb = skb;
+	arg.cb = cb;
+	w->args = &arg;
+
+	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
+			if (e < s_e)
+				goto next;
+			res = fib6_dump_table(tb, skb, cb);
+			if (res != 0)
+				goto out;
+next:
+			e++;
+		}
+	}
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	res = res < 0 ? res : skb->len;
+	if (res <= 0)
+		fib6_dump_end(cb);
+	return res;
+}
 
 /*
  *	Routing Table
@@ -925,7 +1192,7 @@ int fib6_del(struct rt6_info *rt, struct
  *	<0  -> walk is terminated by an error.
  */
 
-int fib6_walk_continue(struct fib6_walker_t *w)
+static int fib6_walk_continue(struct fib6_walker_t *w)
 {
 	struct fib6_node *fn, *pn;
 
@@ -999,7 +1266,7 @@ int fib6_walk_continue(struct fib6_walke
 	}
 }
 
-int fib6_walk(struct fib6_walker_t *w)
+static int fib6_walk(struct fib6_walker_t *w)
 {
 	int res;
 
@@ -1049,9 +1316,9 @@ static int fib6_clean_node(struct fib6_w
  *	ignoring pure split nodes) will be scanned.
  */
 
-void fib6_clean_tree(struct fib6_node *root,
-		     int (*func)(struct rt6_info *, void *arg),
-		     int prune, void *arg)
+static void fib6_clean_tree(struct fib6_node *root,
+			    int (*func)(struct rt6_info *, void *arg),
+			    int prune, void *arg)
 {
 	struct fib6_cleaner_t c;
 
@@ -1064,6 +1331,25 @@ void fib6_clean_tree(struct fib6_node *r
 	fib6_walk(&c.w);
 }
 
+void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+		    int prune, void *arg)
+{
+	struct fib6_table *table;
+	struct hlist_node *node;
+	unsigned int h;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
+					 tb6_hlist) {
+			write_lock_bh(&table->tb6_lock);
+			fib6_clean_tree(&table->tb6_root, func, prune, arg);
+			write_unlock_bh(&table->tb6_lock);
+		}
+	}
+	rcu_read_unlock();
+}
+
 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
 {
 	if (rt->rt6i_flags & RTF_CACHE) {
@@ -1142,11 +1428,8 @@ void fib6_run_gc(unsigned long dummy)
 	}
 	gc_args.more = 0;
 
-
-	write_lock_bh(&rt6_lock);
 	ndisc_dst_gc(&gc_args.more);
-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(fib6_age, 0, NULL);
 
 	if (gc_args.more)
 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@@ -1165,6 +1448,8 @@ void __init fib6_init(void)
 					   NULL, NULL);
 	if (!fib6_node_kmem)
 		panic("cannot create fib6_nodes cache");
+
+	fib6_tables_init();
 }
 
 void fib6_gc_cleanup(void)
diff -puN net/ipv6/ip6_output.c~git-net net/ipv6/ip6_output.c
--- a/net/ipv6/ip6_output.c~git-net
+++ a/net/ipv6/ip6_output.c
@@ -866,7 +866,7 @@ static inline int ip6_ufo_append_data(st
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 		sk->sk_sndmsg_off = 0;
 	}
diff -puN net/ipv6/mcast.c~git-net net/ipv6/mcast.c
--- a/net/ipv6/mcast.c~git-net
+++ a/net/ipv6/mcast.c
@@ -171,7 +171,7 @@ static int ip6_mc_leave_src(struct sock 
 
 #define IPV6_MLD_MAX_MSF	64
 
-int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
 
 /*
  *	socket join on multicast group
diff -puN net/ipv6/ndisc.c~git-net net/ipv6/ndisc.c
--- a/net/ipv6/ndisc.c~git-net
+++ a/net/ipv6/ndisc.c
@@ -62,6 +62,7 @@
 #include <linux/sysctl.h>
 #endif
 
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
@@ -419,6 +420,7 @@ static inline void ndisc_flow_init(struc
 	fl->proto	 	= IPPROTO_ICMPV6;
 	fl->fl_icmp_type	= type;
 	fl->fl_icmp_code	= 0;
+	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
diff -puN net/ipv6/netfilter.c~git-net net/ipv6/netfilter.c
--- a/net/ipv6/netfilter.c~git-net
+++ a/net/ipv6/netfilter.c
@@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_b
 	unsigned int csum = 0;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN)
 			break;
 		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
diff -puN net/ipv6/netfilter/ip6_queue.c~git-net net/ipv6/netfilter/ip6_queue.c
--- a/net/ipv6/netfilter/ip6_queue.c~git-net
+++ a/net/ipv6/netfilter/ip6_queue.c
@@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queu
 		break;
 	
 	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
 		}
diff -puN net/ipv6/netfilter/ip6t_REJECT.c~git-net net/ipv6/netfilter/ip6t_REJECT.c
--- a/net/ipv6/netfilter/ip6t_REJECT.c~git-net
+++ a/net/ipv6/netfilter/ip6t_REJECT.c
@@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *o
 	ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
 	fl.fl_ip_sport = otcph.dest;
 	fl.fl_ip_dport = otcph.source;
+	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(NULL, &fl);
 	if (dst == NULL)
 		return;
diff -puN net/ipv6/netfilter/nf_conntrack_reasm.c~git-net net/ipv6/netfilter/nf_conntrack_reasm.c
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c~git-net
+++ a/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_c
  		return -1;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
+ 	if (skb->ip_summed == CHECKSUM_COMPLETE)
  		skb->csum = csum_sub(skb->csum,
  				     csum_partial(skb->nh.raw,
 						  (u8*)(fhdr + 1) - skb->nh.raw,
@@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_que
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &nf_ct_frag6_mem);
@@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_que
 	head->nh.ipv6h->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
+	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
 
 	fq->fragments = NULL;
diff -puN net/ipv6/raw.c~git-net net/ipv6/raw.c
--- a/net/ipv6/raw.c~git-net
+++ a/net/ipv6/raw.c
@@ -334,7 +334,7 @@ int rawv6_rcv(struct sock *sk, struct sk
 	if (!rp->checksum)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		skb_postpull_rcsum(skb, skb->nh.raw,
 		                   skb->h.raw - skb->nh.raw);
 		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -759,6 +759,7 @@ static int rawv6_sendmsg(struct kiocb *i
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
+	security_sk_classify_flow(sk, &fl);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff -puN net/ipv6/reassembly.c~git-net net/ipv6/reassembly.c
--- a/net/ipv6/reassembly.c~git-net
+++ a/net/ipv6/reassembly.c
@@ -53,10 +53,10 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 
-int sysctl_ip6frag_high_thresh = 256*1024;
-int sysctl_ip6frag_low_thresh = 192*1024;
+int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
+int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
 
-int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
 
 struct ip6frag_skb_cb
 {
@@ -152,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, s
 }
 
 static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
+int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ip6_frag_secret_rebuild(unsigned long dummy)
 {
@@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_q
  		return;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
+ 	if (skb->ip_summed == CHECKSUM_COMPLETE)
  		skb->csum = csum_sub(skb->csum,
  				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
 
@@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_qu
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &ip6_frag_mem);
@@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_qu
 	*skb_in = head;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
+	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
 
 	IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
diff -puN net/ipv6/route.c~git-net net/ipv6/route.c
--- a/net/ipv6/route.c~git-net
+++ a/net/ipv6/route.c
@@ -35,7 +35,6 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
-#include <linux/netlink.h>
 #include <linux/if_arp.h>
 
 #ifdef 	CONFIG_PROC_FS
@@ -54,6 +53,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/netevent.h>
+#include <net/netlink.h>
 
 #include <asm/uaccess.h>
 
@@ -140,15 +140,49 @@ struct rt6_info ip6_null_entry = {
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-	.leaf		= &ip6_null_entry,
-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
-};
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
-/* Protects all the ip6 fib */
+struct rt6_info ip6_prohibit_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -EACCES,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_prohibit_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
 
-DEFINE_RWLOCK(rt6_lock);
+struct rt6_info ip6_blk_hole_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -EINVAL,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
 
+#endif
 
 /* allocate dst with ip6_dst_ops */
 static __inline__ struct rt6_info *ip6_dst_alloc(void)
@@ -188,8 +222,14 @@ static __inline__ int rt6_check_expired(
 		time_after(jiffies, rt->rt6i_expires));
 }
 
+static inline int rt6_need_strict(struct in6_addr *daddr)
+{
+	return (ipv6_addr_type(daddr) &
+		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+}
+
 /*
- *	Route lookup. Any rt6_lock is implied.
+ *	Route lookup. Any table->tb6_lock is implied.
  */
 
 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@@ -441,27 +481,66 @@ int rt6_route_rcv(struct net_device *dev
 }
 #endif
 
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
-			    int oif, int strict)
+#define BACKTRACK() \
+if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
+	while ((fn = fn->parent) != NULL) { \
+		if (fn->fn_flags & RTN_TL_ROOT) { \
+			dst_hold(&rt->u.dst); \
+			goto out; \
+		} \
+		if (fn->fn_flags & RTN_RTINFO) \
+			goto restart; \
+	} \
+}
+
+static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
+					     struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
-	rt = rt6_device_match(fn->leaf, oif, strict);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+restart:
+	rt = fn->leaf;
+	rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
+	BACKTRACK();
 	dst_hold(&rt->u.dst);
-	rt->u.dst.__use++;
-	read_unlock_bh(&rt6_lock);
+out:
+	read_unlock_bh(&table->tb6_lock);
 
 	rt->u.dst.lastuse = jiffies;
-	if (rt->u.dst.error == 0)
-		return rt;
-	dst_release(&rt->u.dst);
+	rt->u.dst.__use++;
+
+	return rt;
+
+}
+
+struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
+			    int oif, int strict)
+{
+	struct flowi fl = {
+		.oif = oif,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *daddr,
+				/* TODO: saddr */
+			},
+		},
+	};
+	struct dst_entry *dst;
+	int flags = strict ? RT6_F_STRICT : 0;
+
+	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
+	if (dst->error == 0)
+		return (struct rt6_info *) dst;
+
+	dst_release(dst);
+
 	return NULL;
 }
 
-/* ip6_ins_rt is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
    be destroyed.
@@ -471,10 +550,12 @@ int ip6_ins_rt(struct rt6_info *rt, stru
 		void *_rtattr, struct netlink_skb_parms *req)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
-	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
-	write_unlock_bh(&rt6_lock);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
+	err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
@@ -532,51 +613,40 @@ static struct rt6_info *rt6_alloc_clone(
 	return rt;
 }
 
-#define BACKTRACK() \
-if (rt == &ip6_null_entry) { \
-       while ((fn = fn->parent) != NULL) { \
-		if (fn->fn_flags & RTN_ROOT) { \
-			goto out; \
-		} \
-		if (fn->fn_flags & RTN_RTINFO) \
-			goto restart; \
-	} \
-}
-
-
-void ip6_route_input(struct sk_buff *skb)
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+					    struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
-	int strict;
+	int strict = 0;
 	int attempts = 3;
 	int err;
 	int reachable = RT6_SELECT_F_REACHABLE;
 
-	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+	if (flags & RT6_F_STRICT)
+		strict = RT6_SELECT_F_IFACE;
 
 relookup:
-	read_lock_bh(&rt6_lock);
+	read_lock_bh(&table->tb6_lock);
 
 restart_2:
-	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
-			 &skb->nh.ipv6h->saddr);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
+	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
 	BACKTRACK();
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
+		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
 	else {
 #if CLONE_OFFLINK_ROUTE
-		nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
+		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
 #else
 		goto out2;
 #endif
@@ -587,7 +657,7 @@ restart:
 
 	dst_hold(&rt->u.dst);
 	if (nrt) {
-		err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
+		err = ip6_ins_rt(nrt, NULL, NULL, NULL);
 		if (!err)
 			goto out2;
 	}
@@ -596,7 +666,7 @@ restart:
 		goto out2;
 
 	/*
-	 * Race condition! In the gap, when rt6_lock was
+	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
 	dst_release(&rt->u.dst);
@@ -608,30 +678,54 @@ out:
 		goto restart_2;
 	}
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 out2:
 	rt->u.dst.lastuse = jiffies;
 	rt->u.dst.__use++;
-	skb->dst = (struct dst_entry *) rt;
-	return;
+
+	return rt;
 }
 
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+void ip6_route_input(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct flowi fl = {
+		.iif = skb->dev->ifindex,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = iph->daddr,
+				.saddr = iph->saddr,
+				.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+			},
+		},
+		.proto = iph->nexthdr,
+	};
+	int flags = 0;
+
+	if (rt6_need_strict(&iph->daddr))
+		flags |= RT6_F_STRICT;
+
+	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
+}
+
+static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
+					     struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
-	int strict;
+	int strict = 0;
 	int attempts = 3;
 	int err;
 	int reachable = RT6_SELECT_F_REACHABLE;
 
-	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+	if (flags & RT6_F_STRICT)
+		strict = RT6_SELECT_F_IFACE;
 
 relookup:
-	read_lock_bh(&rt6_lock);
+	read_lock_bh(&table->tb6_lock);
 
 restart_2:
-	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
 	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
@@ -641,7 +735,7 @@ restart:
 		goto out;
 
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@@ -667,7 +761,7 @@ restart:
 		goto out2;
 
 	/*
-	 * Race condition! In the gap, when rt6_lock was
+	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
 	dst_release(&rt->u.dst);
@@ -679,11 +773,21 @@ out:
 		goto restart_2;
 	}
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 out2:
 	rt->u.dst.lastuse = jiffies;
 	rt->u.dst.__use++;
-	return &rt->u.dst;
+	return rt;
+}
+
+struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+{
+	int flags = 0;
+
+	if (rt6_need_strict(&fl->fl6_dst))
+		flags |= RT6_F_STRICT;
+
+	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
 
 
@@ -747,8 +851,6 @@ static void ip6_rt_update_pmtu(struct ds
 	}
 }
 
-/* Protected by rt6_lock.  */
-static struct dst_entry *ndisc_dst_gc_list;
 static int ipv6_get_mtu(struct net_device *dev);
 
 static inline unsigned int ipv6_advmss(unsigned int mtu)
@@ -769,6 +871,9 @@ static inline unsigned int ipv6_advmss(u
 	return mtu;
 }
 
+static struct dst_entry *ndisc_dst_gc_list;
+static DEFINE_SPINLOCK(ndisc_lock);
+
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
 				  struct neighbour *neigh,
 				  struct in6_addr *addr,
@@ -809,10 +914,10 @@ struct dst_entry *ndisc_dst_alloc(struct
 	rt->rt6i_dst.plen = 128;
 #endif
 
-	write_lock_bh(&rt6_lock);
+	spin_lock_bh(&ndisc_lock);
 	rt->u.dst.next = ndisc_dst_gc_list;
 	ndisc_dst_gc_list = &rt->u.dst;
-	write_unlock_bh(&rt6_lock);
+	spin_unlock_bh(&ndisc_lock);
 
 	fib6_force_start_gc();
 
@@ -826,8 +931,11 @@ int ndisc_dst_gc(int *more)
 	int freed;
 
 	next = NULL;
+ 	freed = 0;
+
+	spin_lock_bh(&ndisc_lock);
 	pprev = &ndisc_dst_gc_list;
-	freed = 0;
+
 	while ((dst = *pprev) != NULL) {
 		if (!atomic_read(&dst->__refcnt)) {
 			*pprev = dst->next;
@@ -839,6 +947,8 @@ int ndisc_dst_gc(int *more)
 		}
 	}
 
+	spin_unlock_bh(&ndisc_lock);
+
 	return freed;
 }
 
@@ -900,7 +1010,8 @@ int ipv6_get_hoplimit(struct net_device 
  */
 
 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
-		void *_rtattr, struct netlink_skb_parms *req)
+		  void *_rtattr, struct netlink_skb_parms *req,
+		  u32 table_id)
 {
 	int err;
 	struct rtmsg *r;
@@ -908,6 +1019,7 @@ int ip6_route_add(struct in6_rtmsg *rtms
 	struct rt6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
+	struct fib6_table *table;
 	int addr_type;
 
 	rta = (struct rtattr **) _rtattr;
@@ -931,6 +1043,12 @@ int ip6_route_add(struct in6_rtmsg *rtms
 	if (rtmsg->rtmsg_metric == 0)
 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
 
+	table = fib6_new_table(table_id);
+	if (table == NULL) {
+		err = -ENOBUFS;
+		goto out;
+	}
+
 	rt = ip6_dst_alloc();
 
 	if (rt == NULL) {
@@ -1087,6 +1205,7 @@ install_route:
 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
 	rt->u.dst.dev = dev;
 	rt->rt6i_idev = idev;
+	rt->rt6i_table = table;
 	return ip6_ins_rt(rt, nlh, _rtattr, req);
 
 out:
@@ -1102,26 +1221,38 @@ out:
 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
+	if (rt == &ip6_null_entry)
+		return -ENOENT;
+
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
 
 	err = fib6_del(rt, nlh, _rtattr, req);
 	dst_release(&rt->u.dst);
 
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
+			 void *_rtattr, struct netlink_skb_parms *req,
+			 u32 table_id)
 {
+	struct fib6_table *table;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 	int err = -ESRCH;
 
-	read_lock_bh(&rt6_lock);
+	table = fib6_get_table(table_id);
+	if (table == NULL)
+		return err;
 
-	fn = fib6_locate(&ip6_routing_table,
+	read_lock_bh(&table->tb6_lock);
+
+	fn = fib6_locate(&table->tb6_root,
 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
 	
@@ -1138,12 +1269,12 @@ static int ip6_route_del(struct in6_rtms
 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
 				continue;
 			dst_hold(&rt->u.dst);
-			read_unlock_bh(&rt6_lock);
+			read_unlock_bh(&table->tb6_lock);
 
 			return ip6_del_rt(rt, nlh, _rtattr, req);
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
@@ -1155,10 +1286,15 @@ void rt6_redirect(struct in6_addr *dest,
 		  struct neighbour *neigh, u8 *lladdr, int on_link)
 {
 	struct rt6_info *rt, *nrt = NULL;
-	int strict;
 	struct fib6_node *fn;
+	struct fib6_table *table;
 	struct netevent_redirect netevent;
 
+	/* TODO: Very lazy, might need to check all tables */
+	table = fib6_get_table(RT6_TABLE_MAIN);
+	if (table == NULL)
+		return;
+
 	/*
 	 * Get the "current" route for this destination and
 	 * check if the redirect has come from approriate router.
@@ -1169,10 +1305,9 @@ void rt6_redirect(struct in6_addr *dest,
 	 * is a bit fuzzy and one might need to check all possible
 	 * routes.
 	 */
-	strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, dest, NULL);
 restart:
 	for (rt = fn->leaf; rt; rt = rt->u.next) {
 		/*
@@ -1195,7 +1330,7 @@ restart:
 	}
 	if (rt)
 		dst_hold(&rt->u.dst);
-	else if (strict) {
+	else if (rt6_need_strict(dest)) {
 		while ((fn = fn->parent) != NULL) {
 			if (fn->fn_flags & RTN_ROOT)
 				break;
@@ -1203,7 +1338,7 @@ restart:
 				goto restart;
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt) {
 		if (net_ratelimit())
@@ -1378,6 +1513,7 @@ static struct rt6_info * ip6_rt_copy(str
 #ifdef CONFIG_IPV6_SUBTREES
 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 #endif
+		rt->rt6i_table = ort->rt6i_table;
 	}
 	return rt;
 }
@@ -1388,9 +1524,14 @@ static struct rt6_info *rt6_get_route_in
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
-	fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+	table = fib6_get_table(RT6_TABLE_INFO);
+	if (table == NULL)
+		return NULL;
+
+	write_lock_bh(&table->tb6_lock);
+	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
 	if (!fn)
 		goto out;
 
@@ -1405,7 +1546,7 @@ static struct rt6_info *rt6_get_route_in
 		break;
 	}
 out:
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1427,7 +1568,7 @@ static struct rt6_info *rt6_add_route_in
 		rtmsg.rtmsg_flags |= RTF_DEFAULT;
 	rtmsg.rtmsg_ifindex = ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
 
 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
 }
@@ -1436,12 +1577,14 @@ static struct rt6_info *rt6_add_route_in
 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 {	
 	struct rt6_info *rt;
-	struct fib6_node *fn;
+	struct fib6_table *table;
 
-	fn = &ip6_routing_table;
+	table = fib6_get_table(RT6_TABLE_DFLT);
+	if (table == NULL)
+		return NULL;
 
-	write_lock_bh(&rt6_lock);
-	for (rt = fn->leaf; rt; rt=rt->u.next) {
+	write_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -1449,7 +1592,7 @@ struct rt6_info *rt6_get_dflt_router(str
 	}
 	if (rt)
 		dst_hold(&rt->u.dst);
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1468,28 +1611,31 @@ struct rt6_info *rt6_add_dflt_router(str
 
 	rtmsg.rtmsg_ifindex = dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
 void rt6_purge_dflt_routers(void)
 {
 	struct rt6_info *rt;
+	struct fib6_table *table;
+
+	/* NOTE: Keep consistent with rt6_get_dflt_router */
+	table = fib6_get_table(RT6_TABLE_DFLT);
+	if (table == NULL)
+		return;
 
 restart:
-	read_lock_bh(&rt6_lock);
-	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+	read_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 			dst_hold(&rt->u.dst);
-
-			read_unlock_bh(&rt6_lock);
-
+			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt, NULL, NULL, NULL);
-
 			goto restart;
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 }
 
 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
@@ -1510,10 +1656,12 @@ int ipv6_route_ioctl(unsigned int cmd, v
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
+					    RT6_TABLE_MAIN);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
+					    RT6_TABLE_MAIN);
 			break;
 		default:
 			err = -EINVAL;
@@ -1583,6 +1731,7 @@ struct rt6_info *addrconf_dst_alloc(stru
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 	rt->rt6i_dst.plen = 128;
+	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
 
 	atomic_set(&rt->u.dst.__refcnt, 1);
 
@@ -1601,9 +1750,7 @@ static int fib6_ifdown(struct rt6_info *
 
 void rt6_ifdown(struct net_device *dev)
 {
-	write_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(fib6_ifdown, 0, dev);
 }
 
 struct rt6_mtu_change_arg
@@ -1653,13 +1800,12 @@ static int rt6_mtu_change_route(struct r
 
 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 {
-	struct rt6_mtu_change_arg arg;
+	struct rt6_mtu_change_arg arg = {
+		.dev = dev,
+		.mtu = mtu,
+	};
 
-	arg.dev = dev;
-	arg.mtu = mtu;
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 }
 
 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
@@ -1709,7 +1855,8 @@ int inet6_rtm_delroute(struct sk_buff *s
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
+			     rtm_get_table(arg, r->rtm_table));
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1719,15 +1866,10 @@ int inet6_rtm_newroute(struct sk_buff *s
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
+			     rtm_get_table(arg, r->rtm_table));
 }
 
-struct rt6_rtnl_dump_arg
-{
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-};
-
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 pid, u32 seq,
@@ -1737,6 +1879,7 @@ static int rt6_fill_node(struct sk_buff 
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 	struct rta_cacheinfo ci;
+	u32 table;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1751,7 +1894,12 @@ static int rt6_fill_node(struct sk_buff 
 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
 	rtm->rtm_src_len = rt->rt6i_src.plen;
 	rtm->rtm_tos = 0;
-	rtm->rtm_table = RT_TABLE_MAIN;
+	if (rt->rt6i_table)
+		table = rt->rt6i_table->tb6_id;
+	else
+		table = RT6_TABLE_UNSPEC;
+	rtm->rtm_table = table;
+	RTA_PUT_U32(skb, RTA_TABLE, table);
 	if (rt->rt6i_flags&RTF_REJECT)
 		rtm->rtm_type = RTN_UNREACHABLE;
 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
@@ -1818,7 +1966,7 @@ rtattr_failure:
 	return -1;
 }
 
-static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	int prefix;
@@ -1834,96 +1982,6 @@ static int rt6_dump_route(struct rt6_inf
 		     prefix, NLM_F_MULTI);
 }
 
-static int fib6_dump_node(struct fib6_walker_t *w)
-{
-	int res;
-	struct rt6_info *rt;
-
-	for (rt = w->leaf; rt; rt = rt->u.next) {
-		res = rt6_dump_route(rt, w->args);
-		if (res < 0) {
-			/* Frame is full, suspend walking */
-			w->leaf = rt;
-			return 1;
-		}
-		BUG_TRAP(res!=0);
-	}
-	w->leaf = NULL;
-	return 0;
-}
-
-static void fib6_dump_end(struct netlink_callback *cb)
-{
-	struct fib6_walker_t *w = (void*)cb->args[0];
-
-	if (w) {
-		cb->args[0] = 0;
-		fib6_walker_unlink(w);
-		kfree(w);
-	}
-	cb->done = (void*)cb->args[1];
-	cb->args[1] = 0;
-}
-
-static int fib6_dump_done(struct netlink_callback *cb)
-{
-	fib6_dump_end(cb);
-	return cb->done ? cb->done(cb) : 0;
-}
-
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct rt6_rtnl_dump_arg arg;
-	struct fib6_walker_t *w;
-	int res;
-
-	arg.skb = skb;
-	arg.cb = cb;
-
-	w = (void*)cb->args[0];
-	if (w == NULL) {
-		/* New dump:
-		 * 
-		 * 1. hook callback destructor.
-		 */
-		cb->args[1] = (long)cb->done;
-		cb->done = fib6_dump_done;
-
-		/*
-		 * 2. allocate and initialize walker.
-		 */
-		w = kzalloc(sizeof(*w), GFP_ATOMIC);
-		if (w == NULL)
-			return -ENOMEM;
-		RT6_TRACE("dump<%p", w);
-		w->root = &ip6_routing_table;
-		w->func = fib6_dump_node;
-		w->args = &arg;
-		cb->args[0] = (long)w;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk(w);
-		read_unlock_bh(&rt6_lock);
-	} else {
-		w->args = &arg;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk_continue(w);
-		read_unlock_bh(&rt6_lock);
-	}
-#if RT6_DEBUG >= 3
-	if (res <= 0 && skb->len == 0)
-		RT6_TRACE("%p>dump end\n", w);
-#endif
-	res = res < 0 ? res : skb->len;
-	/* res < 0 is an error. (really, impossible)
-	   res == 0 means that dump is complete, but skb still can contain data.
-	   res > 0 dump is not complete, but frame is full.
-	 */
-	/* Destroy walker, if dump of this table is complete. */
-	if (res <= 0)
-		fib6_dump_end(cb);
-	return res;
-}
-
 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtattr **rta = arg;
@@ -1982,9 +2040,7 @@ int inet6_rtm_getroute(struct sk_buff *i
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 out:
 	return err;
 out_free:
@@ -1996,27 +2052,25 @@ void inet6_rt_notify(int event, struct r
 			struct netlink_skb_parms *req)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-	u32 pid = current->pid;
-	u32 seq = 0;
-
-	if (req)
-		pid = req->pid;
-	if (nlh)
-		seq = nlh->nlmsg_seq;
-	
-	skb = alloc_skb(size, gfp_any());
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
-		return;
-	}
-	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
+	u32 pid = req ? req->pid : 0;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int payload = sizeof(struct rtmsg) + 256;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+	if (skb == NULL)
+		goto errout;
+
+	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
 }
 
 /*
@@ -2092,16 +2146,13 @@ static int rt6_info_route(struct rt6_inf
 
 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
 {
-	struct rt6_proc_arg arg;
-	arg.buffer = buffer;
-	arg.offset = offset;
-	arg.length = length;
-	arg.skip = 0;
-	arg.len = 0;
-
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	struct rt6_proc_arg arg = {
+		.buffer = buffer,
+		.offset = offset,
+		.length = length,
+	};
+
+	fib6_clean_all(rt6_info_route, 0, &arg);
 
 	*start = buffer;
 	if (offset)
@@ -2274,10 +2325,16 @@ void __init ip6_route_init(void)
 #ifdef CONFIG_XFRM
 	xfrm6_init();
 #endif
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_init();
+#endif
 }
 
 void ip6_route_cleanup(void)
 {
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_cleanup();
+#endif
 #ifdef CONFIG_PROC_FS
 	proc_net_remove("ipv6_route");
 	proc_net_remove("rt6_stats");
diff -puN net/ipv6/tcp_ipv6.c~git-net net/ipv6/tcp_ipv6.c
--- a/net/ipv6/tcp_ipv6.c~git-net
+++ a/net/ipv6/tcp_ipv6.c
@@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *s
 		final_p = &final;
 	}
 
+	security_sk_classify_flow(sk, &fl);
+
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto failure;
@@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *s
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_skb_classify_flow(skb, &fl);
 
 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 				sk->sk_err_soft = -err;
@@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct soc
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_req_classify_flow(req, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -541,7 +545,7 @@ static void tcp_v6_send_check(struct soc
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcphdr *th = skb->h.th;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
 		skb->csum = offsetof(struct tcphdr, check);
 	} else {
@@ -566,7 +570,7 @@ static int tcp_v6_gso_send_check(struct 
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
 				     IPPROTO_TCP, 0);
 	skb->csum = offsetof(struct tcphdr, check);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
@@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_bu
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
@@ -820,6 +826,8 @@ static int tcp_v6_conn_request(struct so
 
 	tcp_rsk(req)->snt_isn = isn;
 
+	security_inet_conn_request(sk, skb, req);
+
 	if (tcp_v6_send_synack(sk, req, NULL))
 		goto drop;
 
@@ -923,6 +931,7 @@ static struct sock * tcp_v6_syn_recv_soc
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_req_classify_flow(req, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
@@ -1024,7 +1033,7 @@ out:
 
 static int tcp_v6_checksum_init(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
 				  &skb->nh.ipv6h->daddr,skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
diff -puN net/ipv6/udp.c~git-net net/ipv6/udp.c
--- a/net/ipv6/udp.c~git-net
+++ a/net/ipv6/udp.c
@@ -345,6 +345,8 @@ out:
 
 static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
+	int rc;
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
 		kfree_skb(skb);
 		return -1;
@@ -356,7 +358,10 @@ static inline int udpv6_queue_rcv_skb(st
 		return 0;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
+	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return 0;
@@ -475,7 +480,7 @@ static int udpv6_rcv(struct sk_buff **ps
 		uh = skb->h.uh;
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
@@ -782,6 +787,8 @@ do_udp_sendmsg:
 		connected = 0;
 	}
 
+	security_sk_classify_flow(sk, fl);
+
 	err = ip6_sk_dst_lookup(sk, &dst, fl);
 	if (err)
 		goto out;
@@ -855,6 +862,16 @@ out:
 		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
 		return len;
 	}
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+	}
 	return err;
 
 do_confirm:
diff -puN net/ipv6/xfrm6_output.c~git-net net/ipv6/xfrm6_output.c
--- a/net/ipv6/xfrm6_output.c~git-net
+++ a/net/ipv6/xfrm6_output.c
@@ -41,8 +41,8 @@ static int xfrm6_output_one(struct sk_bu
 	struct xfrm_state *x = dst->xfrm;
 	int err;
 	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
 		if (err)
 			goto error_nolock;
 	}
diff -puN net/ipx/af_ipx.c~git-net net/ipx/af_ipx.c
--- a/net/ipx/af_ipx.c~git-net
+++ a/net/ipx/af_ipx.c
@@ -1642,13 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
 		goto out;
 
-	ipx		= ipx_hdr(skb);
-	ipx_pktsize	= ntohs(ipx->ipx_pktsize);
+	if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
+		goto drop;
+
+	ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize);
 	
 	/* Too small or invalid header? */
-	if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len)
+	if (ipx_pktsize < sizeof(struct ipxhdr) ||
+	    !pskb_may_pull(skb, ipx_pktsize))
 		goto drop;
                         
+	ipx = ipx_hdr(skb);
 	if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
 	   ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize))
 		goto drop;
diff -puN net/key/af_key.c~git-net net/key/af_key.c
--- a/net/key/af_key.c~git-net
+++ a/net/key/af_key.c
@@ -2708,6 +2708,9 @@ static int pfkey_send_acquire(struct xfr
 #endif
 	int sockaddr_size;
 	int size;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
+	int ctx_size = 0;
 	
 	sockaddr_size = pfkey_sockaddr_size(x->props.family);
 	if (!sockaddr_size)
@@ -2723,6 +2726,11 @@ static int pfkey_send_acquire(struct xfr
 	else if (x->id.proto == IPPROTO_ESP)
 		size += count_esp_combs(t);
 
+	if ((xfrm_ctx = x->security)) {
+		ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+		size +=  sizeof(struct sadb_x_sec_ctx) + ctx_size;
+	}
+
 	skb =  alloc_skb(size + 16, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
@@ -2818,17 +2826,31 @@ static int pfkey_send_acquire(struct xfr
 	else if (x->id.proto == IPPROTO_ESP)
 		dump_esp_combs(skb, t);
 
+	/* security context */
+	if (xfrm_ctx) {
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx->sadb_x_sec_len =
+		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
 }
 
-static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
+static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
                                                 u8 *data, int len, int *dir)
 {
 	struct xfrm_policy *xp;
 	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
 	struct sadb_x_sec_ctx *sec_ctx;
 
-	switch (family) {
+	switch (sk->sk_family) {
 	case AF_INET:
 		if (opt != IP_IPSEC_POLICY) {
 			*dir = -EOPNOTSUPP;
@@ -2869,7 +2891,7 @@ static struct xfrm_policy *pfkey_compile
 	xp->lft.hard_byte_limit = XFRM_INF;
 	xp->lft.soft_packet_limit = XFRM_INF;
 	xp->lft.hard_packet_limit = XFRM_INF;
-	xp->family = family;
+	xp->family = sk->sk_family;
 
 	xp->xfrm_nr = 0;
 	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
@@ -2885,8 +2907,10 @@ static struct xfrm_policy *pfkey_compile
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len)
+		    sec_ctx->sadb_x_sec_len) {
+			*dir = -EINVAL;
 			goto out;
+		}
 		if ((*dir = verify_sec_ctx_len(p)))
 			goto out;
 		uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
@@ -2896,6 +2920,11 @@ static struct xfrm_policy *pfkey_compile
 		if (*dir)
 			goto out;
 	}
+	else {
+		*dir = security_xfrm_sock_policy_alloc(xp, sk);
+		if (*dir)
+			goto out;
+	}
 
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
diff -puN net/llc/llc_sap.c~git-net net/llc/llc_sap.c
--- a/net/llc/llc_sap.c~git-net
+++ a/net/llc/llc_sap.c
@@ -330,6 +330,9 @@ static void llc_sap_mcast(struct llc_sap
 		if (llc->laddr.lsap != laddr->lsap)
 			continue;
 
+		if (llc->dev != skb->dev)
+			continue;
+
 		skb1 = skb_clone(skb, GFP_ATOMIC);
 		if (!skb1)
 			break;
diff -puN net/netfilter/core.c~git-net net/netfilter/core.c
--- a/net/netfilter/core.c~git-net
+++ a/net/netfilter/core.c
@@ -182,7 +182,7 @@ next_hook:
 		ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-		if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
+		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 	}
@@ -222,6 +222,28 @@ copy_skb:
 }
 EXPORT_SYMBOL(skb_make_writable);
 
+u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
+{
+	u_int32_t diff[] = { oldval, newval };
+
+	return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
+}
+EXPORT_SYMBOL(nf_csum_update);
+
+u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+			       u_int32_t oldval, u_int32_t newval,
+			       u_int16_t csum, int pseudohdr)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		csum = nf_csum_update(oldval, newval, csum);
+		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+			skb->csum = nf_csum_update(oldval, newval, skb->csum);
+	} else if (pseudohdr)
+		csum = ~nf_csum_update(oldval, newval, ~csum);
+
+	return csum;
+}
+EXPORT_SYMBOL(nf_proto_csum_update);
 
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
diff -puN net/netfilter/nf_conntrack_proto_tcp.c~git-net net/netfilter/nf_conntrack_proto_tcp.c
--- a/net/netfilter/nf_conntrack_proto_tcp.c~git-net
+++ a/net/netfilter/nf_conntrack_proto_tcp.c
@@ -823,8 +823,7 @@ static int tcp_error(struct sk_buff *skb
   
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (nf_conntrack_checksum &&
diff -puN net/netfilter/nf_conntrack_proto_udp.c~git-net net/netfilter/nf_conntrack_proto_udp.c
--- a/net/netfilter/nf_conntrack_proto_udp.c~git-net
+++ a/net/netfilter/nf_conntrack_proto_udp.c
@@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb
 
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (nf_conntrack_checksum &&
 	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
diff -puN net/netfilter/nf_internals.h~git-net net/netfilter/nf_internals.h
--- a/net/netfilter/nf_internals.h~git-net
+++ a/net/netfilter/nf_internals.h
@@ -23,7 +23,7 @@ extern unsigned int nf_iterate(struct li
 				int hook_thresh);
 
 /* nf_queue.c */
-extern int nf_queue(struct sk_buff **skb, 
+extern int nf_queue(struct sk_buff *skb,
 		    struct list_head *elem, 
 		    int pf, unsigned int hook,
 		    struct net_device *indev,
diff -puN net/netfilter/nf_queue.c~git-net net/netfilter/nf_queue.c
--- a/net/netfilter/nf_queue.c~git-net
+++ a/net/netfilter/nf_queue.c
@@ -74,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_ha
  * Any packet that leaves via this function must come back 
  * through nf_reinject().
  */
-int nf_queue(struct sk_buff **skb, 
-	     struct list_head *elem, 
-	     int pf, unsigned int hook,
-	     struct net_device *indev,
-	     struct net_device *outdev,
-	     int (*okfn)(struct sk_buff *),
-	     unsigned int queuenum)
+static int __nf_queue(struct sk_buff *skb,
+		      struct list_head *elem,
+		      int pf, unsigned int hook,
+		      struct net_device *indev,
+		      struct net_device *outdev,
+		      int (*okfn)(struct sk_buff *),
+		      unsigned int queuenum)
 {
 	int status;
 	struct nf_info *info;
@@ -94,14 +94,14 @@ int nf_queue(struct sk_buff **skb, 
 	read_lock(&queue_handler_lock);
 	if (!queue_handler[pf]) {
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
 	afinfo = nf_get_afinfo(pf);
 	if (!afinfo) {
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
@@ -109,9 +109,9 @@ int nf_queue(struct sk_buff **skb, 
 	if (!info) {
 		if (net_ratelimit())
 			printk(KERN_ERR "OOM queueing packet %p\n",
-			       *skb);
+			       skb);
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
@@ -130,15 +130,15 @@ int nf_queue(struct sk_buff **skb, 
 	if (outdev) dev_hold(outdev);
 
 #ifdef CONFIG_BRIDGE_NETFILTER
-	if ((*skb)->nf_bridge) {
-		physindev = (*skb)->nf_bridge->physindev;
+	if (skb->nf_bridge) {
+		physindev = skb->nf_bridge->physindev;
 		if (physindev) dev_hold(physindev);
-		physoutdev = (*skb)->nf_bridge->physoutdev;
+		physoutdev = skb->nf_bridge->physoutdev;
 		if (physoutdev) dev_hold(physoutdev);
 	}
 #endif
-	afinfo->saveroute(*skb, info);
-	status = queue_handler[pf]->outfn(*skb, info, queuenum,
+	afinfo->saveroute(skb, info);
+	status = queue_handler[pf]->outfn(skb, info, queuenum,
 					  queue_handler[pf]->data);
 
 	read_unlock(&queue_handler_lock);
@@ -153,7 +153,7 @@ int nf_queue(struct sk_buff **skb, 
 #endif
 		module_put(info->elem->owner);
 		kfree(info);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 
 		return 1;
 	}
@@ -161,6 +161,46 @@ int nf_queue(struct sk_buff **skb, 
 	return 1;
 }
 
+int nf_queue(struct sk_buff *skb,
+	     struct list_head *elem,
+	     int pf, unsigned int hook,
+	     struct net_device *indev,
+	     struct net_device *outdev,
+	     int (*okfn)(struct sk_buff *),
+	     unsigned int queuenum)
+{
+	struct sk_buff *segs;
+
+	if (!skb_is_gso(skb))
+		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+				  queuenum);
+
+	switch (pf) {
+	case AF_INET:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case AF_INET6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
+
+	segs = skb_gso_segment(skb, 0);
+	kfree_skb(skb);
+	if (unlikely(IS_ERR(segs)))
+		return 1;
+
+	do {
+		struct sk_buff *nskb = segs->next;
+
+		segs->next = NULL;
+		if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
+				queuenum))
+			kfree_skb(segs);
+		segs = nskb;
+	} while (segs);
+	return 1;
+}
+
 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 		 unsigned int verdict)
 {
@@ -224,9 +264,9 @@ void nf_reinject(struct sk_buff *skb, st
 	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
-		if (!nf_queue(&skb, elem, info->pf, info->hook, 
-			      info->indev, info->outdev, info->okfn,
-			      verdict >> NF_VERDICT_BITS))
+		if (!__nf_queue(skb, elem, info->pf, info->hook,
+				info->indev, info->outdev, info->okfn,
+				verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 		break;
 	default:
diff -puN net/netfilter/nfnetlink_queue.c~git-net net/netfilter/nfnetlink_queue.c
--- a/net/netfilter/nfnetlink_queue.c~git-net
+++ a/net/netfilter/nfnetlink_queue.c
@@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_
 		break;
 	
 	case NFQNL_COPY_PACKET:
-		if (entskb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entskb,
-		                               outdev == NULL))) {
+		if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
+		     entskb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entskb))) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
 		}
diff -puN net/netfilter/xt_string.c~git-net net/netfilter/xt_string.c
--- a/net/netfilter/xt_string.c~git-net
+++ a/net/netfilter/xt_string.c
@@ -37,7 +37,7 @@ static int match(const struct sk_buff *s
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset, 
 			     conf->to_offset, conf->config, &state) 
-			     != UINT_MAX) && !conf->invert;
+			     != UINT_MAX) ^ conf->invert;
 }
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
diff -puN /dev/null net/netlabel/Kconfig
--- /dev/null
+++ a/net/netlabel/Kconfig
@@ -0,0 +1,14 @@
+#
+# NetLabel configuration
+#
+
+config NETLABEL
+	bool "NetLabel subsystem support"
+	depends on NET && SECURITY
+	default n
+	---help---
+	  NetLabel provides support for explicit network packet labeling
+	  protocols such as CIPSO and RIPSO.  For more information see
+	  Documentation/netlabel.
+
+	  If you are unsure, say N.
diff -puN /dev/null net/netlabel/Makefile
--- /dev/null
+++ a/net/netlabel/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the NetLabel subsystem.
+#
+# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
+#
+
+# base objects
+obj-y	:= netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+
+# management objects
+obj-y	+= netlabel_mgmt.o
+
+# protocol modules
+obj-y	+= netlabel_unlabeled.o
+obj-y	+= netlabel_cipso_v4.o
+
diff -puN /dev/null net/netlabel/netlabel_cipso_v4.c
--- /dev/null
+++ a/net/netlabel/netlabel_cipso_v4.c
@@ -0,0 +1,542 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_user.h"
+#include "netlabel_cipso_v4.h"
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_cipsov4_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
+{
+	struct cipso_v4_doi *ptr;
+
+	ptr = container_of(entry, struct cipso_v4_doi, rcu);
+	switch (ptr->type) {
+	case CIPSO_V4_MAP_STD:
+		kfree(ptr->map.std->lvl.cipso);
+		kfree(ptr->map.std->lvl.local);
+		kfree(ptr->map.std->cat.cipso);
+		kfree(ptr->map.std->cat.local);
+		break;
+	}
+	kfree(ptr);
+}
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
+ * @doi: the DOI value
+ * @msg: the ADD message data
+ * @msg_size: the size of the ADD message buffer
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
+{
+	int ret_val = -EINVAL;
+	int msg_len = msg_size;
+	u32 num_tags;
+	u32 num_lvls;
+	u32 num_cats;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 iter;
+	u32 tmp_val_a;
+	u32 tmp_val_b;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_std_failure;
+	num_tags = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
+		goto add_std_failure;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
+	if (doi_def->map.std == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->type = CIPSO_V4_MAP_STD;
+
+	for (iter = 0; iter < num_tags; iter++) {
+		if (msg_len < NETLBL_LEN_U8)
+			goto add_std_failure;
+		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			break;
+		default:
+			goto add_std_failure;
+		}
+	}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	if (msg_len < 6 * NETLBL_LEN_U32)
+		goto add_std_failure;
+
+	num_lvls = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_lvls == 0)
+		goto add_std_failure;
+	doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len);
+	if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS)
+		goto add_std_failure;
+	doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->lvl.local == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
+	if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
+		goto add_std_failure;
+	doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->lvl.cipso == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+
+	num_cats = netlbl_getinc_u32(&msg, &msg_len);
+	doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len);
+	if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS)
+		goto add_std_failure;
+	doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->cat.local == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len);
+	if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
+		goto add_std_failure;
+	doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->cat.cipso == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+
+	if (msg_len <
+	    num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
+	    num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
+		goto add_std_failure;
+
+	for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
+		doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
+	for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
+		doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
+	for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
+		doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
+	for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
+		doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
+
+	for (iter = 0; iter < num_lvls; iter++) {
+		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
+		tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
+
+		if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
+		    tmp_val_b >= doi_def->map.std->lvl.cipso_size)
+			goto add_std_failure;
+
+		doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
+		doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
+	}
+
+	for (iter = 0; iter < num_cats; iter++) {
+		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
+		tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
+
+		if (tmp_val_a >= doi_def->map.std->cat.local_size ||
+		    tmp_val_b >= doi_def->map.std->cat.cipso_size)
+			goto add_std_failure;
+
+		doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a;
+		doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b;
+	}
+
+	doi_def->doi = doi;
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_std_failure;
+	return 0;
+
+add_std_failure:
+	if (doi_def)
+		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
+ * @doi: the DOI value
+ * @msg: the ADD message data
+ * @msg_size: the size of the ADD message buffer
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_pass(u32 doi,
+				   struct nlattr *msg,
+				   size_t msg_size)
+{
+	int ret_val = -EINVAL;
+	int msg_len = msg_size;
+	u32 num_tags;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 iter;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_pass_failure;
+	num_tags = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
+		goto add_pass_failure;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL) {
+		ret_val = -ENOMEM;
+		goto add_pass_failure;
+	}
+	doi_def->type = CIPSO_V4_MAP_PASS;
+
+	for (iter = 0; iter < num_tags; iter++) {
+		if (msg_len < NETLBL_LEN_U8)
+			goto add_pass_failure;
+		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			break;
+		default:
+			goto add_pass_failure;
+		}
+	}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	doi_def->doi = doi;
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_pass_failure;
+	return 0;
+
+add_pass_failure:
+	if (doi_def)
+		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new DOI definition based on the given ADD message and add it to the
+ * CIPSO V4 engine.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
+
+{
+	int ret_val = -EINVAL;
+	u32 doi;
+	u32 map_type;
+	int msg_len = netlbl_netlink_payload_len(skb);
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto add_return;
+
+	if (msg_len < 2 * NETLBL_LEN_U32)
+		goto add_return;
+
+	doi = netlbl_getinc_u32(&msg, &msg_len);
+	map_type = netlbl_getinc_u32(&msg, &msg_len);
+	switch (map_type) {
+	case CIPSO_V4_MAP_STD:
+		ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len);
+		break;
+	case CIPSO_V4_MAP_PASS:
+		ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len);
+		break;
+	}
+
+add_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond accordingly.  Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	u32 doi;
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+	struct sk_buff *ans_skb;
+
+	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32)
+		goto list_failure;
+
+	doi = nla_get_u32(msg);
+	ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL) {
+		ret_val = -ENOMEM;
+		goto list_failure;
+	}
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_LIST);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall - Handle a LISTALL message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTALL message and respond accordingly.  Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct sk_buff *ans_skb;
+
+	ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL) {
+		ret_val = -ENOMEM;
+		goto listall_failure;
+	}
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_LISTALL);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listall_failure;
+
+	return 0;
+
+listall_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+	u32 doi;
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto remove_return;
+
+	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
+		ret_val = -EINVAL;
+		goto remove_return;
+	}
+
+	doi = nla_get_u32(msg);
+	ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
+
+remove_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_cipsov4_genl_c_add = {
+	.cmd = NLBL_CIPSOV4_C_ADD,
+	.flags = 0,
+	.doit = netlbl_cipsov4_add,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_remove = {
+	.cmd = NLBL_CIPSOV4_C_REMOVE,
+	.flags = 0,
+	.doit = netlbl_cipsov4_remove,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_list = {
+	.cmd = NLBL_CIPSOV4_C_LIST,
+	.flags = 0,
+	.doit = netlbl_cipsov4_list,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_listall = {
+	.cmd = NLBL_CIPSOV4_C_LISTALL,
+	.flags = 0,
+	.doit = netlbl_cipsov4_listall,
+	.dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component
+ *
+ * Description:
+ * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_cipsov4_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_cipsov4_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_add);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_remove);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_listall);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
diff -puN /dev/null net/netlabel/netlabel_cipso_v4.h
--- /dev/null
+++ a/net/netlabel/netlabel_cipso_v4.h
@@ -0,0 +1,217 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_CIPSO_V4
+#define _NETLABEL_CIPSO_V4
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the CIPSO subsystem, all
+ * of which are preceeded by the nlmsghdr struct.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ADD:
+ *   Sent by an application to add a new DOI mapping table, after completion
+ *   of the task the kernel should ACK this message.
+ *
+ *   +---------------+--------------------+---------------------+
+ *   | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ...
+ *   +---------------+--------------------+---------------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *   +-------------- ---- --- -- -
+ *   | mapping data
+ *   +-------------- ---- --- -- -
+ *
+ *     DOI:          the DOI value
+ *     map type:     the mapping table type (defined in the cipso_ipv4.h header
+ *                   as CIPSO_V4_MAP_*)
+ *     tag count:    the number of tags, must be greater than zero
+ *     tag:          the CIPSO tag for the DOI, tags listed first are given
+ *                   higher priorirty when sending packets
+ *     mapping data: specific to the map type (see below)
+ *
+ *   CIPSO_V4_MAP_STD
+ *
+ *   +------------------+-----------------------+----------------------+
+ *   | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
+ *   +------------------+-----------------------+----------------------+
+ *
+ *   +----------------------+---------------------+---------------------+
+ *   | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
+ *   +----------------------+---------------------+---------------------+
+ *
+ *   +--------------------------+-------------------------+
+ *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
+ *   +--------------------------+-------------------------+
+ *
+ *   +-----------------------------+-----------------------------+
+ *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
+ *   +-----------------------------+-----------------------------+
+ *
+ *     levels:         the number of level mappings
+ *     max l level:    the highest local level
+ *     max r level:    the highest remote/CIPSO level
+ *     categories:     the number of category mappings
+ *     max l cat:      the highest local category
+ *     max r cat:      the highest remote/CIPSO category
+ *     local level:    the local part of a level mapping
+ *     CIPSO level:    the remote/CIPSO part of a level mapping
+ *     local category: the local part of a category mapping
+ *     CIPSO category: the remote/CIPSO part of a category mapping
+ *
+ *   CIPSO_V4_MAP_PASS
+ *
+ *   No mapping data is needed for this map type.
+ *
+ * o REMOVE:
+ *   Sent by an application to remove a specific DOI mapping table from the
+ *   CIPSO V4 system.  The kernel should ACK this message.
+ *
+ *   +---------------+
+ *   | DOI (32 bits) |
+ *   +---------------+
+ *
+ *     DOI:          the DOI value
+ *
+ * o LIST:
+ *   Sent by an application to list the details of a DOI definition.  The
+ *   kernel should send an ACK on error or a response as indicated below.  The
+ *   application generated message format is shown below.
+ *
+ *   +---------------+
+ *   | DOI (32 bits) |
+ *   +---------------+
+ *
+ *     DOI:          the DOI value
+ *
+ *   The valid response message format depends on the type of the DOI mapping,
+ *   the known formats are shown below.
+ *
+ *   +--------------------+
+ *   | map type (32 bits) | ...
+ *   +--------------------+
+ *
+ *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
+ *                     header as CIPSO_V4_MAP_*)
+ *
+ *   (map type == CIPSO_V4_MAP_STD)
+ *
+ *   +----------------+------------------+----------------------+
+ *   | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
+ *   +----------------+------------------+----------------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *   +--------------------------+-------------------------+
+ *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
+ *   +--------------------------+-------------------------+
+ *
+ *   +-----------------------------+-----------------------------+
+ *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
+ *   +-----------------------------+-----------------------------+
+ *
+ *     tags:           the number of CIPSO tag types
+ *     levels:         the number of level mappings
+ *     categories:     the number of category mappings
+ *     tag:            the tag number, tags listed first are given higher
+ *                     priority when sending packets
+ *     local level:    the local part of a level mapping
+ *     CIPSO level:    the remote/CIPSO part of a level mapping
+ *     local category: the local part of a category mapping
+ *     CIPSO category: the remote/CIPSO part of a category mapping
+ *
+ *   (map type == CIPSO_V4_MAP_PASS)
+ *
+ *   +----------------+
+ *   | tags (32 bits) | ...
+ *   +----------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *     tags:           the number of CIPSO tag types
+ *     tag:            the tag number, tags listed first are given higher
+ *                     priority when sending packets
+ *
+ * o LISTALL:
+ *   This message is sent by an application to list the valid DOIs on the
+ *   system.  There is no payload and the kernel should respond with an ACK
+ *   or the following message.
+ *
+ *   +---------------------+------------------+-----------------------+
+ *   | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
+ *   +---------------------+------------------+-----------------------+
+ *
+ *   +-----------------------+
+ *   | map type #X (32 bits) | ...
+ *   +-----------------------+
+ *
+ *     DOI count:      the number of DOIs
+ *     DOI:            the DOI value
+ *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
+ *                     header as CIPSO_V4_MAP_*)
+ *
+ */
+
+/* NetLabel CIPSOv4 commands */
+enum {
+	NLBL_CIPSOV4_C_UNSPEC,
+	NLBL_CIPSOV4_C_ACK,
+	NLBL_CIPSOV4_C_ADD,
+	NLBL_CIPSOV4_C_REMOVE,
+	NLBL_CIPSOV4_C_LIST,
+	NLBL_CIPSOV4_C_LISTALL,
+	__NLBL_CIPSOV4_C_MAX,
+};
+#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_cipsov4_genl_init(void);
+
+#endif
diff -puN /dev/null net/netlabel/netlabel_domainhash.c
--- /dev/null
+++ a/net/netlabel/netlabel_domainhash.c
@@ -0,0 +1,513 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
+
+struct netlbl_domhsh_tbl {
+	struct list_head *tbl;
+	u32 size;
+};
+
+/* Domain hash table */
+/* XXX - updates should be so rare that having one spinlock for the entire
+ * hash table should be okay */
+static DEFINE_SPINLOCK(netlbl_domhsh_lock);
+static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
+
+/* Default domain mapping */
+static DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
+static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
+
+/*
+ * Domain Hash Table Helper Functions
+ */
+
+/**
+ * netlbl_domhsh_free_entry - Frees a domain hash table entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a hash table entry can be released
+ * safely.
+ *
+ */
+static void netlbl_domhsh_free_entry(struct rcu_head *entry)
+{
+	struct netlbl_dom_map *ptr;
+
+	ptr = container_of(entry, struct netlbl_dom_map, rcu);
+	kfree(ptr->domain);
+	kfree(ptr);
+}
+
+/**
+ * netlbl_domhsh_hash - Hashing function for the domain hash table
+ * @domain: the domain name to hash
+ *
+ * Description:
+ * This is the hashing function for the domain hash table, it returns the
+ * correct bucket number for the domain.  The caller is responsibile for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+static u32 netlbl_domhsh_hash(const char *key)
+{
+	u32 iter;
+	u32 val;
+	u32 len;
+
+	/* This is taken (with slight modification) from
+	 * security/selinux/ss/symtab.c:symhash() */
+
+	for (iter = 0, val = 0, len = strlen(key); iter < len; iter++)
+		val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter];
+	return val & (rcu_dereference(netlbl_domhsh)->size - 1);
+}
+
+/**
+ * netlbl_domhsh_search - Search for a domain entry
+ * @domain: the domain
+ * @def: return default if no match is found
+ *
+ * Description:
+ * Searches the domain hash table and returns a pointer to the hash table
+ * entry if found, otherwise NULL is returned.  If @def is non-zero and a
+ * match is not found in the domain hash table the default mapping is returned
+ * if it exists.  The caller is responsibile for the rcu hash table locks
+ * (i.e. the caller much call rcu_read_[un]lock()).
+ *
+ */
+static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
+{
+	u32 bkt;
+	struct netlbl_dom_map *iter;
+
+	if (domain != NULL) {
+		bkt = netlbl_domhsh_hash(domain);
+		list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list)
+			if (iter->valid && strcmp(iter->domain, domain) == 0)
+				return iter;
+	}
+
+	if (def != 0) {
+		iter = rcu_dereference(netlbl_domhsh_def);
+		if (iter != NULL && iter->valid)
+			return iter;
+	}
+
+	return NULL;
+}
+
+/*
+ * Domain Hash Table Functions
+ */
+
+/**
+ * netlbl_domhsh_init - Init for the domain hash
+ * @size: the number of bits to use for the hash buckets
+ *
+ * Description:
+ * Initializes the domain hash table, should be called only by
+ * netlbl_user_init() during initialization.  Returns zero on success, non-zero
+ * values on error.
+ *
+ */
+int netlbl_domhsh_init(u32 size)
+{
+	u32 iter;
+	struct netlbl_domhsh_tbl *hsh_tbl;
+
+	if (size == 0)
+		return -EINVAL;
+
+	hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
+	if (hsh_tbl == NULL)
+		return -ENOMEM;
+	hsh_tbl->size = 1 << size;
+	hsh_tbl->tbl = kcalloc(hsh_tbl->size,
+			       sizeof(struct list_head),
+			       GFP_KERNEL);
+	if (hsh_tbl->tbl == NULL) {
+		kfree(hsh_tbl);
+		return -ENOMEM;
+	}
+	for (iter = 0; iter < hsh_tbl->size; iter++)
+		INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
+
+	rcu_read_lock();
+	spin_lock(&netlbl_domhsh_lock);
+	rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
+	spin_unlock(&netlbl_domhsh_lock);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * netlbl_domhsh_add - Adds a entry to the domain hash table
+ * @entry: the entry to add
+ *
+ * Description:
+ * Adds a new entry to the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry)
+{
+	int ret_val;
+	u32 bkt;
+
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = 0;
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
+						  entry->domain);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret_val != 0)
+		return ret_val;
+
+	entry->valid = 1;
+	INIT_RCU_HEAD(&entry->rcu);
+
+	ret_val = 0;
+	rcu_read_lock();
+	if (entry->domain != NULL) {
+		bkt = netlbl_domhsh_hash(entry->domain);
+		spin_lock(&netlbl_domhsh_lock);
+		if (netlbl_domhsh_search(entry->domain, 0) == NULL)
+			list_add_tail_rcu(&entry->list,
+					  &netlbl_domhsh->tbl[bkt]);
+		else
+			ret_val = -EEXIST;
+		spin_unlock(&netlbl_domhsh_lock);
+	} else if (entry->domain == NULL) {
+		INIT_LIST_HEAD(&entry->list);
+		spin_lock(&netlbl_domhsh_def_lock);
+		if (rcu_dereference(netlbl_domhsh_def) == NULL)
+			rcu_assign_pointer(netlbl_domhsh_def, entry);
+		else
+			ret_val = -EEXIST;
+		spin_unlock(&netlbl_domhsh_def_lock);
+	} else
+		ret_val = -EINVAL;
+	rcu_read_unlock();
+
+	if (ret_val != 0) {
+		switch (entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+						       entry->domain) != 0)
+				BUG();
+			break;
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
+ * @entry: the entry to add
+ *
+ * Description:
+ * Adds a new default entry to the domain hash table and handles any updates
+ * to the lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry)
+{
+	return netlbl_domhsh_add(entry);
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain)
+{
+	int ret_val = -ENOENT;
+	struct netlbl_dom_map *entry;
+
+	rcu_read_lock();
+	if (domain != NULL)
+		entry = netlbl_domhsh_search(domain, 0);
+	else
+		entry = netlbl_domhsh_search(domain, 1);
+	if (entry == NULL)
+		goto remove_return;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+						     entry->domain);
+		if (ret_val != 0)
+			goto remove_return;
+		break;
+	}
+	ret_val = 0;
+	if (entry != rcu_dereference(netlbl_domhsh_def)) {
+		spin_lock(&netlbl_domhsh_lock);
+		if (entry->valid) {
+			entry->valid = 0;
+			list_del_rcu(&entry->list);
+		} else
+			ret_val = -ENOENT;
+		spin_unlock(&netlbl_domhsh_lock);
+	} else {
+		spin_lock(&netlbl_domhsh_def_lock);
+		if (entry->valid) {
+			entry->valid = 0;
+			rcu_assign_pointer(netlbl_domhsh_def, NULL);
+		} else
+			ret_val = -ENOENT;
+		spin_unlock(&netlbl_domhsh_def_lock);
+	}
+	if (ret_val == 0)
+		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+
+remove_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove_default - Removes the default entry from the table
+ *
+ * Description:
+ * Removes/resets the default entry for the domain hash table and handles any
+ * updates to the lower level protocol handler (i.e. CIPSO).  Returns zero on
+ * success, non-zero on failure.
+ *
+ */
+int netlbl_domhsh_remove_default(void)
+{
+	return netlbl_domhsh_remove(NULL);
+}
+
+/**
+ * netlbl_domhsh_getentry - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain,
+ * return a pointer to a copy of the entry or NULL.  The caller is responsibile
+ * for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
+{
+	return netlbl_domhsh_search(domain, 1);
+}
+
+/**
+ * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff
+ *
+ * Description:
+ * Dump the domain hash table into a buffer suitable for returning to an
+ * application in response to a NetLabel management DOMAIN message.  This
+ * function may fail if another process is growing the hash table at the same
+ * time.  The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes.  See netlabel.h for the DOMAIN message format.  Returns a
+ * pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *netlbl_domhsh_dump(size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	ssize_t buf_len;
+	u32 bkt_iter;
+	u32 dom_cnt = 0;
+	struct netlbl_domhsh_tbl *hsh_tbl;
+	struct netlbl_dom_map *list_iter;
+	ssize_t tmp_len;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	hsh_tbl = rcu_dereference(netlbl_domhsh);
+	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
+		list_for_each_entry_rcu(list_iter,
+					&hsh_tbl->tbl[bkt_iter], list) {
+			buf_len += NETLBL_LEN_U32 +
+				nla_total_size(strlen(list_iter->domain) + 1);
+			switch (list_iter->type) {
+			case NETLBL_NLTYPE_UNLABELED:
+				break;
+			case NETLBL_NLTYPE_CIPSOV4:
+				buf_len += 2 * NETLBL_LEN_U32;
+				break;
+			}
+			dom_cnt++;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto dump_failure;
+
+	if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
+		goto dump_failure;
+	buf_len -= NETLBL_LEN_U32;
+	hsh_tbl = rcu_dereference(netlbl_domhsh);
+	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
+		list_for_each_entry_rcu(list_iter,
+					&hsh_tbl->tbl[bkt_iter], list) {
+			tmp_len = nla_total_size(strlen(list_iter->domain) +
+						 1);
+			if (buf_len < NETLBL_LEN_U32 + tmp_len)
+				goto dump_failure;
+			if (nla_put_string(skb,
+					   NLA_STRING,
+					   list_iter->domain) != 0)
+				goto dump_failure;
+			if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
+				goto dump_failure;
+			buf_len -= NETLBL_LEN_U32 + tmp_len;
+			switch (list_iter->type) {
+			case NETLBL_NLTYPE_UNLABELED:
+				break;
+			case NETLBL_NLTYPE_CIPSOV4:
+				if (buf_len < 2 * NETLBL_LEN_U32)
+					goto dump_failure;
+				if (nla_put_u32(skb,
+				       NLA_U32,
+				       list_iter->type_def.cipsov4->type) != 0)
+					goto dump_failure;
+				if (nla_put_u32(skb,
+				       NLA_U32,
+				       list_iter->type_def.cipsov4->doi) != 0)
+					goto dump_failure;
+				buf_len -= 2 * NETLBL_LEN_U32;
+				break;
+			}
+		}
+	rcu_read_unlock();
+
+	return skb;
+
+dump_failure:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return NULL;
+}
+
+/**
+ * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
+ *
+ * Description:
+ * Dump the default domain mapping into a buffer suitable for returning to an
+ * application in response to a NetLabel management DEFDOMAIN message.  This
+ * function may fail if another process is changing the default domain mapping
+ * at the same time.  The returned sk_buff has room at the front of the
+ * skb_buff for @headroom bytes.  See netlabel.h for the DEFDOMAIN message
+ * format.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
+{
+	struct sk_buff *skb;
+	ssize_t buf_len;
+	struct netlbl_dom_map *entry;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	entry = rcu_dereference(netlbl_domhsh_def);
+	if (entry != NULL)
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			buf_len += 2 * NETLBL_LEN_U32;
+			break;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto dump_default_failure;
+
+	if (entry != rcu_dereference(netlbl_domhsh_def))
+		goto dump_default_failure;
+	if (entry != NULL) {
+		if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
+			goto dump_default_failure;
+		buf_len -= NETLBL_LEN_U32;
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (buf_len < 2 * NETLBL_LEN_U32)
+				goto dump_default_failure;
+			if (nla_put_u32(skb,
+					NLA_U32,
+					entry->type_def.cipsov4->type) != 0)
+				goto dump_default_failure;
+			if (nla_put_u32(skb,
+					NLA_U32,
+					entry->type_def.cipsov4->doi) != 0)
+				goto dump_default_failure;
+			buf_len -= 2 * NETLBL_LEN_U32;
+			break;
+		}
+	} else
+		nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
+	rcu_read_unlock();
+
+	return skb;
+
+dump_default_failure:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return NULL;
+}
diff -puN /dev/null net/netlabel/netlabel_domainhash.h
--- /dev/null
+++ a/net/netlabel/netlabel_domainhash.h
@@ -0,0 +1,63 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_DOMAINHASH_H
+#define _NETLABEL_DOMAINHASH_H
+
+/* Domain hash table size */
+/* XXX - currently this number is an uneducated guess */
+#define NETLBL_DOMHSH_BITSIZE       7
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map {
+	char *domain;
+	u32 type;
+	union {
+		struct cipso_v4_doi *cipsov4;
+	} type_def;
+
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* init function */
+int netlbl_domhsh_init(u32 size);
+
+/* Manipulate the domain hash table */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry);
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
+int netlbl_domhsh_remove_default(void);
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+struct sk_buff *netlbl_domhsh_dump(size_t headroom);
+struct sk_buff *netlbl_domhsh_dump_default(size_t headroom);
+
+#endif
diff -puN /dev/null net/netlabel/netlabel_kapi.c
--- /dev/null
+++ a/net/netlabel/netlabel_kapi.c
@@ -0,0 +1,231 @@
+/*
+ * NetLabel Kernel API
+ *
+ * This file defines the kernel API for the NetLabel system.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_user.h"
+
+/*
+ * LSM Functions
+ */
+
+/**
+ * netlbl_socket_setattr - Label a socket using the correct protocol
+ * @sock: the socket to label
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given socket using the security attributes
+ * specified in @secattr.  This function requires exclusive access to
+ * @sock->sk, which means it either needs to be in the process of being
+ * created or locked via lock_sock(sock->sk).  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int netlbl_socket_setattr(const struct socket *sock,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOENT;
+	struct netlbl_dom_map *dom_entry;
+
+	rcu_read_lock();
+	dom_entry = netlbl_domhsh_getentry(secattr->domain);
+	if (dom_entry == NULL)
+		goto socket_setattr_return;
+	switch (dom_entry->type) {
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_socket_setattr(sock,
+						  dom_entry->type_def.cipsov4,
+						  secattr);
+		break;
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = 0;
+		break;
+	default:
+		ret_val = -ENOENT;
+	}
+
+socket_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_socket_getattr - Determine the security attributes of a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given socket to see any NetLabel style labeling has been
+ * applied to the socket, if so it parses the socket label and returns the
+ * security attributes in @secattr.  Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_socket_getattr(const struct socket *sock,
+			  struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_socket_getattr(sock, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_getattr - Determine the security attributes of a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given packet to see if a recognized form of packet labeling
+ * is present, if so it parses the packet label and returns the security
+ * attributes in @secattr.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+			  struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_skbuff_getattr(skb, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_err - Handle a LSM error on a sk_buff
+ * @skb: the packet
+ * @error: the error code
+ *
+ * Description:
+ * Deal with a LSM problem when handling the packet in @skb, typically this is
+ * a permission denied problem (-EACCES).  The correct action is determined
+ * according to the packet's labeling protocol.
+ *
+ */
+void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+	if (CIPSO_V4_OPTEXIST(skb))
+		cipso_v4_error(skb, error, 0);
+}
+
+/**
+ * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches
+ *
+ * Description:
+ * For all of the NetLabel protocols that support some form of label mapping
+ * cache, invalidate the cache.  Returns zero on success, negative values on
+ * error.
+ *
+ */
+void netlbl_cache_invalidate(void)
+{
+	cipso_v4_cache_invalidate();
+}
+
+/**
+ * netlbl_cache_add - Add an entry to a NetLabel protocol cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add the LSM security attributes for the given packet to the underlying
+ * NetLabel protocol's label mapping cache.  Returns zero on success, negative
+ * values on error.
+ *
+ */
+int netlbl_cache_add(const struct sk_buff *skb,
+		     const struct netlbl_lsm_secattr *secattr)
+{
+	if (secattr->cache.data == NULL)
+		return -ENOMSG;
+
+	if (CIPSO_V4_OPTEXIST(skb))
+		return cipso_v4_cache_add(skb, secattr);
+
+	return -ENOMSG;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * netlbl_init - Initialize NetLabel
+ *
+ * Description:
+ * Perform the required NetLabel initialization before first use.
+ *
+ */
+static int __init netlbl_init(void)
+{
+	int ret_val;
+
+	printk(KERN_INFO "NetLabel: Initializing\n");
+	printk(KERN_INFO "NetLabel:  domain hash size = %u\n",
+	       (1 << NETLBL_DOMHSH_BITSIZE));
+	printk(KERN_INFO "NetLabel:  protocols ="
+	       " UNLABELED"
+	       " CIPSOv4"
+	       "\n");
+
+	ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
+	if (ret_val != 0)
+		goto init_failure;
+
+	ret_val = netlbl_netlink_init();
+	if (ret_val != 0)
+		goto init_failure;
+
+	ret_val = netlbl_unlabel_defconf();
+	if (ret_val != 0)
+		goto init_failure;
+	printk(KERN_INFO "NetLabel:  unlabeled traffic allowed by default\n");
+
+	return 0;
+
+init_failure:
+	panic("NetLabel: failed to initialize properly (%d)\n", ret_val);
+}
+
+subsys_initcall(netlbl_init);
diff -puN /dev/null net/netlabel/netlabel_mgmt.c
--- /dev/null
+++ a/net/netlabel/netlabel_mgmt.c
@@ -0,0 +1,624 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_user.h"
+#include "netlabel_mgmt.h"
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_mgmt_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_MGMT_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table.  See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	u32 count;
+	struct netlbl_dom_map *entry = NULL;
+	u32 iter;
+	u32 tmp_val;
+	int tmp_size;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto add_failure;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_failure;
+	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) {
+		if (msg_len <= 0) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (entry == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		tmp_size = nla_len(msg_ptr);
+		if (tmp_size <= 0 || tmp_size > msg_len) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+		if (entry->domain == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		nla_strlcpy(entry->domain, msg_ptr, tmp_size);
+		entry->domain[tmp_size - 1] = '\0';
+		msg_ptr = nla_next(msg_ptr, &msg_len);
+
+		if (msg_len < NETLBL_LEN_U32) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+		entry->type = tmp_val;
+		switch (tmp_val) {
+		case NETLBL_NLTYPE_UNLABELED:
+			ret_val = netlbl_domhsh_add(entry);
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (msg_len < NETLBL_LEN_U32) {
+				ret_val = -EINVAL;
+				goto add_failure;
+			}
+			tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+			/* We should be holding a rcu_read_lock() here
+			 * while we hold the result but since the entry
+			 * will always be deleted when the CIPSO DOI
+			 * is deleted we aren't going to keep the lock. */
+			rcu_read_lock();
+			entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+			if (entry->type_def.cipsov4 == NULL) {
+				rcu_read_unlock();
+				ret_val = -EINVAL;
+				goto add_failure;
+			}
+			ret_val = netlbl_domhsh_add(entry);
+			rcu_read_unlock();
+			break;
+		default:
+			ret_val = -EINVAL;
+		}
+		if (ret_val != 0)
+			goto add_failure;
+	}
+
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				NETLBL_E_OK);
+	return 0;
+
+add_failure:
+	if (entry)
+		kfree(entry->domain);
+	kfree(entry);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and remove the specified domain
+ * mappings.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	u32 count;
+	u32 iter;
+	int tmp_size;
+	unsigned char *domain;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto remove_return;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto remove_return;
+	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	for (iter = 0; iter < count && msg_len > 0; iter++) {
+		if (msg_len <= 0) {
+			ret_val = -EINVAL;
+			goto remove_return;
+		}
+		tmp_size = nla_len(msg_ptr);
+		domain = nla_data(msg_ptr);
+		if (tmp_size <= 0 || tmp_size > msg_len ||
+		    domain[tmp_size - 1] != '\0') {
+			ret_val = -EINVAL;
+			goto remove_return;
+		}
+		ret_val = netlbl_domhsh_remove(domain);
+		if (ret_val != 0)
+			goto remove_return;
+		msg_ptr = nla_next(msg_ptr, &msg_len);
+	}
+
+	ret_val = 0;
+
+remove_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and dumps the domain hash table in a
+ * form suitable for use in a kernel generated LIST message.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL)
+		goto list_failure;
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_LIST);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_adddef - Handle an ADDDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADDDEF message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	struct netlbl_dom_map *entry = NULL;
+	u32 tmp_val;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto adddef_failure;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto adddef_failure;
+	tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		ret_val = -ENOMEM;
+		goto adddef_failure;
+	}
+
+	entry->type = tmp_val;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = netlbl_domhsh_add_default(entry);
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		if (msg_len < NETLBL_LEN_U32) {
+			ret_val = -EINVAL;
+			goto adddef_failure;
+		}
+		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+		/* We should be holding a rcu_read_lock here while we
+		 * hold the result but since the entry will always be
+		 * deleted when the CIPSO DOI is deleted we are going
+		 * to skip the lock. */
+		rcu_read_lock();
+		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+		if (entry->type_def.cipsov4 == NULL) {
+			rcu_read_unlock();
+			ret_val = -EINVAL;
+			goto adddef_failure;
+		}
+		ret_val = netlbl_domhsh_add_default(entry);
+		rcu_read_unlock();
+		break;
+	default:
+		ret_val = -EINVAL;
+	}
+	if (ret_val != 0)
+		goto adddef_failure;
+
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				NETLBL_E_OK);
+	return 0;
+
+adddef_failure:
+	kfree(entry);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_removedef - Handle a REMOVEDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVEDEF message and remove the default domain
+ * mapping.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto removedef_return;
+
+	ret_val = netlbl_domhsh_remove_default();
+
+removedef_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_listdef - Handle a LISTDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTDEF message and dumps the default domain
+ * mapping in a form suitable for use in a kernel generated LISTDEF message.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL)
+		goto listdef_failure;
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_LISTDEF);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listdef_failure;
+
+	return 0;
+
+listdef_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_modules - Handle a MODULES message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated MODULES message and respond accordingly.
+ *
+ */
+static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	size_t data_size;
+	u32 mod_count;
+	struct sk_buff *ans_skb = NULL;
+
+	/* unlabeled + cipsov4 */
+	mod_count = 2;
+
+	data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32;
+	ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto modules_failure;
+
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_mgmt_gnl_family.id,
+				   NLBL_MGMT_C_MODULES) == NULL)
+		goto modules_failure;
+
+	ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
+	if (ret_val != 0)
+		goto modules_failure;
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
+	if (ret_val != 0)
+		goto modules_failure;
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
+	if (ret_val != 0)
+		goto modules_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto modules_failure;
+
+	return 0;
+
+modules_failure:
+	kfree_skb(ans_skb);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_version - Handle a VERSION message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated VERSION message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb = NULL;
+
+	ans_skb = netlbl_netlink_alloc_skb(0,
+					   GENL_HDRLEN + NETLBL_LEN_U32,
+					   GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto version_failure;
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_mgmt_gnl_family.id,
+				   NLBL_MGMT_C_VERSION) == NULL)
+		goto version_failure;
+
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION);
+	if (ret_val != 0)
+		goto version_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto version_failure;
+
+	return 0;
+
+version_failure:
+	kfree_skb(ans_skb);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_mgmt_genl_c_add = {
+	.cmd = NLBL_MGMT_C_ADD,
+	.flags = 0,
+	.doit = netlbl_mgmt_add,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_remove = {
+	.cmd = NLBL_MGMT_C_REMOVE,
+	.flags = 0,
+	.doit = netlbl_mgmt_remove,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_list = {
+	.cmd = NLBL_MGMT_C_LIST,
+	.flags = 0,
+	.doit = netlbl_mgmt_list,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_adddef = {
+	.cmd = NLBL_MGMT_C_ADDDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_adddef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_removedef = {
+	.cmd = NLBL_MGMT_C_REMOVEDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_removedef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_listdef = {
+	.cmd = NLBL_MGMT_C_LISTDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_listdef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_modules = {
+	.cmd = NLBL_MGMT_C_MODULES,
+	.flags = 0,
+	.doit = netlbl_mgmt_modules,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_version = {
+	.cmd = NLBL_MGMT_C_VERSION,
+	.flags = 0,
+	.doit = netlbl_mgmt_version,
+	.dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_mgmt_genl_init - Register the NetLabel management component
+ *
+ * Description:
+ * Register the NetLabel management component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_mgmt_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_mgmt_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_add);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_remove);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_adddef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_removedef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_listdef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_modules);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_version);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
diff -puN /dev/null net/netlabel/netlabel_mgmt.h
--- /dev/null
+++ a/net/netlabel/netlabel_mgmt.h
@@ -0,0 +1,246 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_MGMT_H
+#define _NETLABEL_MGMT_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the management interface,
+ * all of which are preceeded by the nlmsghdr struct.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ADD:
+ *   Sent by an application to add a domain mapping to the NetLabel system.
+ *   The kernel should respond with an ACK.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+-------------------------+
+ *   | domain string (variable) | protocol type (32 bits) | ...
+ *   +--------------------------+-------------------------+
+ *
+ *   +-------------- ---- --- -- -
+ *   | mapping data                ... repeated
+ *   +-------------- ---- --- -- -
+ *
+ *     domain string: the domain string, NULL terminated
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +---------------+
+ *   | doi (32 bits) |
+ *   +---------------+
+ *
+ *     doi:  the CIPSO DOI value
+ *
+ * o REMOVE:
+ *   Sent by an application to remove a domain mapping from the NetLabel
+ *   system.  The kernel should ACK this message.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+
+ *   | domain string (variable) | ...
+ *   +--------------------------+
+ *
+ *     domain string: the domain string, NULL terminated
+ *
+ * o LIST:
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LIST message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a LIST
+ *   message either with a LIST message on success or an ACK message on
+ *   failure.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+
+ *   | domain string (variable) | ...
+ *   +--------------------------+
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     domain string: the domain string, NULL terminated
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +----------------+---------------+
+ *   | type (32 bits) | doi (32 bits) |
+ *   +----------------+---------------+
+ *
+ *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
+ *           as CIPSO_V4_MAP_*)
+ *     doi:  the CIPSO DOI value
+ *
+ * o ADDDEF:
+ *   Sent by an application to set the default domain mapping for the NetLabel
+ *   system.  The kernel should respond with an ACK.
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +---------------+
+ *   | doi (32 bits) |
+ *   +---------------+
+ *
+ *     doi:  the CIPSO DOI value
+ *
+ * o REMOVEDEF:
+ *   Sent by an application to remove the default domain mapping from the
+ *   NetLabel system, there is no payload.  The kernel should ACK this message.
+ *
+ * o LISTDEF:
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LISTDEF message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a
+ *   LISTDEF message either with a LISTDEF message on success or an ACK message
+ *   on failure.
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +----------------+---------------+
+ *   | type (32 bits) | doi (32 bits) |
+ *   +----------------+---------------+
+ *
+ *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
+ *           as CIPSO_V4_MAP_*)
+ *     doi:  the CIPSO DOI value
+ *
+ * o MODULES:
+ *   Sent by an application to request a list of configured NetLabel modules
+ *   in the kernel.  When sent by an application there is no payload.
+ *
+ *   +-------------------+
+ *   | modules (32 bits) | ...
+ *   +-------------------+
+ *
+ *     modules: the number of modules in the message, if this is an application
+ *              generated message and the value is zero then return a list of
+ *              the configured modules
+ *
+ *   +------------------+
+ *   | module (32 bits) | ... repeated
+ *   +------------------+
+ *
+ *     module: the module number as defined by NETLBL_NLTYPE_*
+ *
+ * o VERSION:
+ *   Sent by an application to request the NetLabel version string.  When sent
+ *   by an application there is no payload.  This message type is also used by
+ *   the kernel to respond to an VERSION request.
+ *
+ *   +-------------------+
+ *   | version (32 bits) |
+ *   +-------------------+
+ *
+ *     version: the protocol version number
+ *
+ */
+
+/* NetLabel Management commands */
+enum {
+	NLBL_MGMT_C_UNSPEC,
+	NLBL_MGMT_C_ACK,
+	NLBL_MGMT_C_ADD,
+	NLBL_MGMT_C_REMOVE,
+	NLBL_MGMT_C_LIST,
+	NLBL_MGMT_C_ADDDEF,
+	NLBL_MGMT_C_REMOVEDEF,
+	NLBL_MGMT_C_LISTDEF,
+	NLBL_MGMT_C_MODULES,
+	NLBL_MGMT_C_VERSION,
+	__NLBL_MGMT_C_MAX,
+};
+#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_mgmt_genl_init(void);
+
+#endif
diff -puN /dev/null net/netlabel/netlabel_unlabeled.c
--- /dev/null
+++ a/net/netlabel/netlabel_unlabeled.c
@@ -0,0 +1,253 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system.  The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_user.h"
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+
+/* Accept unlabeled packets flag */
+static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0);
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_unlabel_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_UNLABELED_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_unlabel_accept - Handle an ACCEPT message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ACCEPT message and set the accept flag accordingly.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+	struct nlattr *data = netlbl_netlink_payload_data(skb);
+	u32 value;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		return ret_val;
+
+	if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) {
+		value = nla_get_u32(data);
+		if (value == 1 || value == 0) {
+			atomic_set(&netlabel_unlabel_accept_flg, value);
+			netlbl_netlink_send_ack(info,
+						netlbl_unlabel_gnl_family.id,
+						NLBL_UNLABEL_C_ACK,
+						NETLBL_E_OK);
+			return 0;
+		}
+	}
+
+	netlbl_netlink_send_ack(info,
+				netlbl_unlabel_gnl_family.id,
+				NLBL_UNLABEL_C_ACK,
+				EINVAL);
+	return -EINVAL;
+}
+
+/**
+ * netlbl_unlabel_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond with the current status.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_netlink_alloc_skb(0,
+					   GENL_HDRLEN + NETLBL_LEN_U32,
+					   GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto list_failure;
+
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_unlabel_gnl_family.id,
+				   NLBL_UNLABEL_C_LIST) == NULL)
+		goto list_failure;
+
+	ret_val = nla_put_u32(ans_skb,
+			      NLA_U32,
+			      atomic_read(&netlabel_unlabel_accept_flg));
+	if (ret_val != 0)
+		goto list_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_unlabel_gnl_family.id,
+				NLBL_UNLABEL_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_unlabel_genl_c_accept = {
+	.cmd = NLBL_UNLABEL_C_ACCEPT,
+	.flags = 0,
+	.doit = netlbl_unlabel_accept,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_unlabel_genl_c_list = {
+	.cmd = NLBL_UNLABEL_C_LIST,
+	.flags = 0,
+	.doit = netlbl_unlabel_list,
+	.dumpit = NULL,
+};
+
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component
+ *
+ * Description:
+ * Register the unlabeled packet NetLabel component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_unlabel_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_unlabel_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+				    &netlbl_unlabel_genl_c_accept);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+				    &netlbl_unlabel_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
+
+/*
+ * NetLabel KAPI Hooks
+ */
+
+/**
+ * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Determine the security attributes, if any, for an unlabled packet and return
+ * them in @secattr.  Returns zero on success and negative values on failure.
+ *
+ */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
+{
+	if (atomic_read(&netlabel_unlabel_accept_flg) == 1) {
+		memset(secattr, 0, sizeof(*secattr));
+		return 0;
+	}
+
+	return -ENOMSG;
+}
+
+/**
+ * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets
+ *
+ * Description:
+ * Set the default NetLabel configuration to allow incoming unlabeled packets
+ * and to send unlabeled network traffic by default.
+ *
+ */
+int netlbl_unlabel_defconf(void)
+{
+	int ret_val;
+	struct netlbl_dom_map *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+	entry->type = NETLBL_NLTYPE_UNLABELED;
+	ret_val = netlbl_domhsh_add_default(entry);
+	if (ret_val != 0)
+		return ret_val;
+
+	atomic_set(&netlabel_unlabel_accept_flg, 1);
+
+	return 0;
+}
diff -puN /dev/null net/netlabel/netlabel_unlabeled.h
--- /dev/null
+++ a/net/netlabel/netlabel_unlabeled.h
@@ -0,0 +1,98 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system.  The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_UNLABELED_H
+#define _NETLABEL_UNLABELED_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the Unlabeled subsystem.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ACCEPT
+ *   This message is sent from an application to specify if the kernel should
+ *   allow unlabled packets to pass if they do not match any of the static
+ *   mappings defined in the unlabeled module.
+ *
+ *   +-----------------+
+ *   | allow (32 bits) |
+ *   +-----------------+
+ *
+ *     allow: if true (1) then allow the packets to pass, if false (0) then
+ *            reject the packets
+ *
+ * o LIST
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LIST message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a LIST
+ *   message either with a LIST message on success or an ACK message on
+ *   failure.
+ *
+ *   +-----------------------+
+ *   | accept flag (32 bits) |
+ *   +-----------------------+
+ *
+ *     accept flag: if true (1) then unlabeled packets are allowed to pass,
+ *                  if false (0) then unlabeled packets are rejected
+ *
+ */
+
+/* NetLabel Unlabeled commands */
+enum {
+	NLBL_UNLABEL_C_UNSPEC,
+	NLBL_UNLABEL_C_ACK,
+	NLBL_UNLABEL_C_ACCEPT,
+	NLBL_UNLABEL_C_LIST,
+	__NLBL_UNLABEL_C_MAX,
+};
+#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_unlabel_genl_init(void);
+
+/* Process Unlabeled incoming network packets */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr);
+
+/* Set the default configuration to allow Unlabeled packets */
+int netlbl_unlabel_defconf(void);
+
+#endif
diff -puN /dev/null net/netlabel/netlabel_user.c
--- /dev/null
+++ a/net/netlabel/netlabel_user.c
@@ -0,0 +1,158 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_cipso_v4.h"
+#include "netlabel_user.h"
+
+/*
+ * NetLabel NETLINK Setup Functions
+ */
+
+/**
+ * netlbl_netlink_init - Initialize the NETLINK communication channel
+ *
+ * Description:
+ * Call out to the NetLabel components so they can register their families and
+ * commands with the Generic NETLINK mechanism.  Returns zero on success and
+ * non-zero on failure.
+ *
+ */
+int netlbl_netlink_init(void)
+{
+	int ret_val;
+
+	ret_val = netlbl_mgmt_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = netlbl_cipsov4_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = netlbl_unlabel_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
+
+/*
+ * NetLabel Common Protocol Functions
+ */
+
+/**
+ * netlbl_netlink_send_ack - Send an ACK message
+ * @info: the generic NETLINK information
+ * @genl_family: the generic NETLINK family ID value
+ * @ack_cmd: the generic NETLINK family ACK command value
+ * @ret_code: return code to use
+ *
+ * Description:
+ * This function sends an ACK message to the sender of the NETLINK message
+ * specified by @info.
+ *
+ */
+void netlbl_netlink_send_ack(const struct genl_info *info,
+			     u32 genl_family,
+			     u8 ack_cmd,
+			     u32 ret_code)
+{
+	size_t data_size;
+	struct sk_buff *skb;
+
+	data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
+	skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
+	if (skb == NULL)
+		return;
+
+	if (netlbl_netlink_hdr_put(skb,
+				   info->snd_pid,
+				   0,
+				   genl_family,
+				   ack_cmd) == NULL)
+		goto send_ack_failure;
+
+	if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
+		goto send_ack_failure;
+	if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
+		goto send_ack_failure;
+
+	netlbl_netlink_snd(skb, info->snd_pid);
+	return;
+
+send_ack_failure:
+	kfree_skb(skb);
+}
+
+/*
+ * NETLINK I/O Functions
+ */
+
+/**
+ * netlbl_netlink_snd - Send a NetLabel message
+ * @skb: NetLabel message
+ * @pid: destination PID
+ *
+ * Description:
+ * Sends a unicast NetLabel message over the NETLINK socket.
+ *
+ */
+int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
+{
+	return genlmsg_unicast(skb, pid);
+}
+
+/**
+ * netlbl_netlink_snd - Send a NetLabel message
+ * @skb: NetLabel message
+ * @pid: sending PID
+ * @group: multicast group id
+ *
+ * Description:
+ * Sends a multicast NetLabel message over the NETLINK socket to all members
+ * of @group except @pid.
+ *
+ */
+int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
+{
+	return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
+}
diff -puN /dev/null net/netlabel/netlabel_user.h
--- /dev/null
+++ a/net/netlabel/netlabel_user.h
@@ -0,0 +1,214 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_USER_H
+#define _NETLABEL_USER_H
+
+#include <linux/skbuff.h>
+#include <linux/capability.h>
+#include <linux/genetlink.h>
+#include <net/netlabel.h>
+#include <net/genetlink.h>
+
+/* NetLabel NETLINK helper functions */
+
+/**
+ * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
+ * @skb: the NETLINK buffer
+ * @req_cap: the required capability
+ *
+ * Description:
+ * Check the NETLINK buffer's capabilities against the required capabilities.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
+					   kernel_cap_t req_cap)
+{
+	if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
+		return 0;
+	return -EPERM;
+}
+
+/**
+ * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u8 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
+{
+	u8 val = nla_get_u8(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u16 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
+{
+	u16 val = nla_get_u16(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u32 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
+{
+	u32 val = nla_get_u32(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet.  Returns a pointer to the start of the payload buffer
+ * on success or NULL on failure.
+ *
+ */
+static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
+					   u32 pid,
+					   u32 seq,
+					   int type,
+					   u8 cmd)
+{
+	return genlmsg_put(skb,
+			   pid,
+			   seq,
+			   type,
+			   0,
+			   0,
+			   cmd,
+			   NETLBL_PROTO_VERSION);
+}
+
+/**
+ * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet.
+ *
+ */
+static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
+					   u32 pid,
+					   u32 seq,
+					   int type,
+					   u8 cmd)
+
+{
+	struct nlmsghdr *nlh;
+	struct genlmsghdr *hdr;
+
+	nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
+	nlh->nlmsg_type = type;
+	nlh->nlmsg_len = skb->len;
+	nlh->nlmsg_flags = 0;
+	nlh->nlmsg_pid = pid;
+	nlh->nlmsg_seq = seq;
+
+	hdr = nlmsg_data(nlh);
+	hdr->cmd = cmd;
+	hdr->version = NETLBL_PROTO_VERSION;
+	hdr->reserved = 0;
+}
+
+/**
+ * netlbl_netlink_payload_len - Return the length of the payload
+ * @skb: the NETLINK buffer
+ *
+ * Description:
+ * This function returns the length of the NetLabel payload.
+ *
+ */
+static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
+{
+	return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
+}
+
+/**
+ * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
+ * @skb: the NETLINK buffer
+ *
+ * Description:
+ * This function returns a pointer to the start of the NetLabel payload.
+ *
+ */
+static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
+{
+  return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
+	  GENL_HDRLEN;
+}
+
+/* NetLabel common protocol functions */
+
+void netlbl_netlink_send_ack(const struct genl_info *info,
+			     u32 genl_family,
+			     u8 ack_cmd,
+			     u32 ret_code);
+
+/* NetLabel NETLINK I/O functions */
+
+int netlbl_netlink_init(void);
+int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
+int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
+
+#endif
diff -puN net/netlink/af_netlink.c~git-net net/netlink/af_netlink.c
--- a/net/netlink/af_netlink.c~git-net
+++ a/net/netlink/af_netlink.c
@@ -1147,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb 
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
 	err = -ENOBUFS;
-	skb = alloc_skb(len, GFP_KERNEL);
+	skb = nlmsg_new(len, GFP_KERNEL);
 	if (skb==NULL)
 		goto out;
 
@@ -1342,19 +1342,18 @@ static int netlink_dump(struct sock *sk)
 	struct netlink_callback *cb;
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
-	int len;
+	int len, err = -ENOBUFS;
 	
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
-		return -ENOBUFS;
+		goto errout;
 
 	spin_lock(&nlk->cb_lock);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
-		spin_unlock(&nlk->cb_lock);
-		kfree_skb(skb);
-		return -EINVAL;
+		err = -EINVAL;
+		goto errout_skb;
 	}
 
 	len = cb->dump(skb, cb);
@@ -1366,8 +1365,12 @@ static int netlink_dump(struct sock *sk)
 		return 0;
 	}
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
-	memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
+	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
+	if (!nlh)
+		goto errout_skb;
+
+	memcpy(nlmsg_data(nlh), &len, sizeof(len));
+
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
 
@@ -1379,8 +1382,11 @@ static int netlink_dump(struct sock *sk)
 	netlink_destroy_callback(cb);
 	return 0;
 
-nlmsg_failure:
-	return -ENOBUFS;
+errout_skb:
+	spin_unlock(&nlk->cb_lock);
+	kfree_skb(skb);
+errout:
+	return err;
 }
 
 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1432,11 +1438,11 @@ void netlink_ack(struct sk_buff *in_skb,
 	int size;
 
 	if (err == 0)
-		size = NLMSG_SPACE(sizeof(struct nlmsgerr));
+		size = nlmsg_total_size(sizeof(*errmsg));
 	else
-		size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
+		size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
 
-	skb = alloc_skb(size, GFP_KERNEL);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
@@ -1452,16 +1458,15 @@ void netlink_ack(struct sk_buff *in_skb,
 
 	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 			  NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
-	errmsg = NLMSG_DATA(rep);
+	errmsg = nlmsg_data(rep);
 	errmsg->error = err;
-	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
+	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
 }
 
 static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 						     struct nlmsghdr *, int *))
 {
-	unsigned int total_len;
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -1471,8 +1476,6 @@ static int netlink_rcv_skb(struct sk_buf
 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
 
-		total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
-
 		if (cb(skb, nlh, &err) < 0) {
 			/* Not an error, but we have to interrupt processing
 			 * here. Note: that in this case we do not pull
@@ -1484,7 +1487,7 @@ static int netlink_rcv_skb(struct sk_buf
 		} else if (nlh->nlmsg_flags & NLM_F_ACK)
 			netlink_ack(skb, nlh, 0);
 
-		skb_pull(skb, total_len);
+		netlink_queue_skip(nlh, skb);
 	}
 
 	return 0;
@@ -1547,6 +1550,38 @@ void netlink_queue_skip(struct nlmsghdr 
 	skb_pull(skb, msglen);
 }
 
+/**
+ * nlmsg_notify - send a notification netlink message
+ * @sk: netlink socket to use
+ * @skb: notification message
+ * @pid: destination netlink pid for reports or 0
+ * @group: destination multicast group or 0
+ * @report: 1 to report back, 0 to disable
+ * @flags: allocation flags
+ */
+int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
+		 unsigned int group, int report, gfp_t flags)
+{
+	int err = 0;
+
+	if (group) {
+		int exclude_pid = 0;
+
+		if (report) {
+			atomic_inc(&skb->users);
+			exclude_pid = pid;
+		}
+
+		/* errors reported via destination sk->sk_err */
+		nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+	}
+
+	if (report)
+		err = nlmsg_unicast(sk, skb, pid);
+
+	return err;
+}
+
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
 	int link;
@@ -1801,4 +1836,4 @@ EXPORT_SYMBOL(netlink_set_err);
 EXPORT_SYMBOL(netlink_set_nonroot);
 EXPORT_SYMBOL(netlink_unicast);
 EXPORT_SYMBOL(netlink_unregister_notifier);
-
+EXPORT_SYMBOL(nlmsg_notify);
diff -puN net/netlink/attr.c~git-net net/netlink/attr.c
--- a/net/netlink/attr.c~git-net
+++ a/net/netlink/attr.c
@@ -255,6 +255,26 @@ struct nlattr *__nla_reserve(struct sk_b
 }
 
 /**
+ * __nla_reserve_nohdr - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the payload.
+ */
+void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+	void *start;
+
+	start = skb_put(skb, NLA_ALIGN(attrlen));
+	memset(start, 0, NLA_ALIGN(attrlen));
+
+	return start;
+}
+
+/**
  * nla_reserve - reserve room for attribute on the skb
  * @skb: socket buffer to reserve room on
  * @attrtype: attribute type
@@ -275,6 +295,24 @@ struct nlattr *nla_reserve(struct sk_buf
 }
 
 /**
+ * nla_reserve - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @len: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return NULL;
+
+	return __nla_reserve_nohdr(skb, attrlen);
+}
+
+/**
  * __nla_put - Add a netlink attribute to a socket buffer
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
@@ -293,6 +331,22 @@ void __nla_put(struct sk_buff *skb, int 
 	memcpy(nla_data(nla), data, attrlen);
 }
 
+/**
+ * __nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute payload.
+ */
+void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+	void *start;
+
+	start = __nla_reserve_nohdr(skb, attrlen);
+	memcpy(start, data, attrlen);
+}
 
 /**
  * nla_put - Add a netlink attribute to a socket buffer
@@ -313,15 +367,36 @@ int nla_put(struct sk_buff *skb, int att
 	return 0;
 }
 
+/**
+ * nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -1 if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return -1;
+
+	__nla_put_nohdr(skb, attrlen, data);
+	return 0;
+}
 
 EXPORT_SYMBOL(nla_validate);
 EXPORT_SYMBOL(nla_parse);
 EXPORT_SYMBOL(nla_find);
 EXPORT_SYMBOL(nla_strlcpy);
 EXPORT_SYMBOL(__nla_reserve);
+EXPORT_SYMBOL(__nla_reserve_nohdr);
 EXPORT_SYMBOL(nla_reserve);
+EXPORT_SYMBOL(nla_reserve_nohdr);
 EXPORT_SYMBOL(__nla_put);
+EXPORT_SYMBOL(__nla_put_nohdr);
 EXPORT_SYMBOL(nla_put);
+EXPORT_SYMBOL(nla_put_nohdr);
 EXPORT_SYMBOL(nla_memcpy);
 EXPORT_SYMBOL(nla_memcmp);
 EXPORT_SYMBOL(nla_strcmp);
diff -puN net/netlink/genetlink.c~git-net net/netlink/genetlink.c
--- a/net/netlink/genetlink.c~git-net
+++ a/net/netlink/genetlink.c
@@ -440,7 +440,7 @@ static struct sk_buff *ctrl_build_msg(st
 	struct sk_buff *skb;
 	int err;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE);
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
@@ -510,7 +510,7 @@ static int genl_ctrl_event(int event, vo
 		if (IS_ERR(msg))
 			return PTR_ERR(msg);
 
-		genlmsg_multicast(msg, 0, GENL_ID_CTRL);
+		genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
 		break;
 	}
 
diff -puN net/packet/af_packet.c~git-net net/packet/af_packet.c
--- a/net/packet/af_packet.c~git-net
+++ a/net/packet/af_packet.c
@@ -586,7 +586,7 @@ static int tpacket_rcv(struct sk_buff *s
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
-			if (skb->ip_summed == CHECKSUM_HW)
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				status |= TP_STATUS_CSUMNOTREADY;
 		}
 	}
diff -puN net/sched/act_api.c~git-net net/sched/act_api.c
--- a/net/sched/act_api.c~git-net
+++ a/net/sched/act_api.c
@@ -459,7 +459,6 @@ static int
 act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 {
 	struct sk_buff *skb;
-	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -468,10 +467,8 @@ act_get_notify(u32 pid, struct nlmsghdr 
 		kfree_skb(skb);
 		return -EINVAL;
 	}
-	err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-	return err;
+
+	return rtnl_unicast(skb, pid);
 }
 
 static struct tc_action *
diff -puN net/sched/sch_htb.c~git-net net/sched/sch_htb.c
--- a/net/sched/sch_htb.c~git-net
+++ a/net/sched/sch_htb.c
@@ -1,4 +1,4 @@
-/* vim: ts=8 sw=8
+/*
  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
  *
  *		This program is free software; you can redistribute it and/or
@@ -68,218 +68,165 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16	/* classid hash size */
-#define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#undef HTB_DEBUG	/* compile debugging support (activated by tc tool) */
-#define HTB_RATECM 1    /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
-#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
-#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30011	/* major must be matched with number suplied by TC as version */
+#define HTB_HSIZE 16		/* classid hash size */
+#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
+#define HTB_RATECM 1		/* whether to use rate computer */
+#define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
+#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
-/* debugging support; S is subsystem, these are defined:
-  0 - netlink messages
-  1 - enqueue
-  2 - drop & requeue
-  3 - dequeue main
-  4 - dequeue one prio DRR part
-  5 - dequeue class accounting
-  6 - class overlimit status computation
-  7 - hint tree
-  8 - event queue
- 10 - rate estimator
- 11 - classifier 
- 12 - fast dequeue cache
-
- L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
- q->debug uint32 contains 16 2-bit fields one for subsystem starting
- from LSB
- */
-#ifdef HTB_DEBUG
-#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
-#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
-	printk(KERN_DEBUG FMT,##ARG)
-#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
-#define HTB_PASSQ q,
-#define HTB_ARGQ struct htb_sched *q,
-#define static
-#undef __inline__
-#define __inline__
-#undef inline
-#define inline
-#define HTB_CMAGIC 0xFEFAFEF1
-#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
-		if ((N)->rb_color == -1) break; \
-		rb_erase(N,R); \
-		(N)->rb_color = -1; } while (0)
-#else
-#define HTB_DBG_COND(S,L) (0)
-#define HTB_DBG(S,L,FMT,ARG...)
-#define HTB_PASSQ
-#define HTB_ARGQ
-#define HTB_CHCL(cl)
-#define htb_safe_rb_erase(N,R) rb_erase(N,R)
-#endif
-
-
 /* used internaly to keep status of single class */
 enum htb_cmode {
-    HTB_CANT_SEND,		/* class can't send and can't borrow */
-    HTB_MAY_BORROW,		/* class can't send but may borrow */
-    HTB_CAN_SEND		/* class can send */
+	HTB_CANT_SEND,		/* class can't send and can't borrow */
+	HTB_MAY_BORROW,		/* class can't send but may borrow */
+	HTB_CAN_SEND		/* class can send */
 };
 
 /* interior & leaf nodes; props specific to leaves are marked L: */
-struct htb_class
-{
-#ifdef HTB_DEBUG
-	unsigned magic;
-#endif
-    /* general class parameters */
-    u32 classid;
-    struct gnet_stats_basic bstats;
-    struct gnet_stats_queue qstats;
-    struct gnet_stats_rate_est rate_est;
-    struct tc_htb_xstats xstats;/* our special stats */
-    int refcnt;			/* usage count of this class */
+struct htb_class {
+	/* general class parameters */
+	u32 classid;
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct tc_htb_xstats xstats;	/* our special stats */
+	int refcnt;		/* usage count of this class */
 
 #ifdef HTB_RATECM
-    /* rate measurement counters */
-    unsigned long rate_bytes,sum_bytes;
-    unsigned long rate_packets,sum_packets;
-#endif
-
-    /* topology */
-    int level;			/* our level (see above) */
-    struct htb_class *parent;	/* parent class */
-    struct list_head hlist;	/* classid hash list item */
-    struct list_head sibling;	/* sibling list item */
-    struct list_head children;	/* children list */
-
-    union {
-	    struct htb_class_leaf {
-		    struct Qdisc *q;
-		    int prio;
-		    int aprio;	
-		    int quantum;
-		    int deficit[TC_HTB_MAXDEPTH];
-		    struct list_head drop_list;
-	    } leaf;
-	    struct htb_class_inner {
-		    struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
-		    struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
-            /* When class changes from state 1->2 and disconnects from 
-               parent's feed then we lost ptr value and start from the
-              first child again. Here we store classid of the
-              last valid ptr (used when ptr is NULL). */
-              u32 last_ptr_id[TC_HTB_NUMPRIO];
-	    } inner;
-    } un;
-    struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
-    struct rb_node pq_node;		 /* node for event queue */
-    unsigned long pq_key;	/* the same type as jiffies global */
-    
-    int prio_activity;		/* for which prios are we active */
-    enum htb_cmode cmode;	/* current mode of the class */
-
-    /* class attached filters */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
-
-    int warned;		/* only one warning about non work conserving .. */
-
-    /* token bucket parameters */
-    struct qdisc_rate_table *rate;	/* rate table of the class itself */
-    struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
-    long buffer,cbuffer;		/* token bucket depth/rate */
-    psched_tdiff_t mbuffer;		/* max wait time */
-    long tokens,ctokens;		/* current number of tokens */
-    psched_time_t t_c;			/* checkpoint time */
+	/* rate measurement counters */
+	unsigned long rate_bytes, sum_bytes;
+	unsigned long rate_packets, sum_packets;
+#endif
+
+	/* topology */
+	int level;		/* our level (see above) */
+	struct htb_class *parent;	/* parent class */
+	struct hlist_node hlist;	/* classid hash list item */
+	struct list_head sibling;	/* sibling list item */
+	struct list_head children;	/* children list */
+
+	union {
+		struct htb_class_leaf {
+			struct Qdisc *q;
+			int prio;
+			int aprio;
+			int quantum;
+			int deficit[TC_HTB_MAXDEPTH];
+			struct list_head drop_list;
+		} leaf;
+		struct htb_class_inner {
+			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
+			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
+			/* When class changes from state 1->2 and disconnects from
+			   parent's feed then we lost ptr value and start from the
+			   first child again. Here we store classid of the
+			   last valid ptr (used when ptr is NULL). */
+			u32 last_ptr_id[TC_HTB_NUMPRIO];
+		} inner;
+	} un;
+	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
+	struct rb_node pq_node;	/* node for event queue */
+	unsigned long pq_key;	/* the same type as jiffies global */
+
+	int prio_activity;	/* for which prios are we active */
+	enum htb_cmode cmode;	/* current mode of the class */
+
+	/* class attached filters */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
+
+	int warned;		/* only one warning about non work conserving .. */
+
+	/* token bucket parameters */
+	struct qdisc_rate_table *rate;	/* rate table of the class itself */
+	struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
+	long buffer, cbuffer;	/* token bucket depth/rate */
+	psched_tdiff_t mbuffer;	/* max wait time */
+	long tokens, ctokens;	/* current number of tokens */
+	psched_time_t t_c;	/* checkpoint time */
 };
 
 /* TODO: maybe compute rate when size is too large .. or drop ? */
-static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
-	int size)
-{ 
-    int slot = size >> rate->rate.cell_log;
-    if (slot > 255) {
-	cl->xstats.giants++;
-	slot = 255;
-    }
-    return rate->data[slot];
-}
-
-struct htb_sched
-{
-    struct list_head root;			/* root classes list */
-    struct list_head hash[HTB_HSIZE];		/* hashed by classid */
-    struct list_head drops[TC_HTB_NUMPRIO];	/* active leaves (for drops) */
-    
-    /* self list - roots of self generating tree */
-    struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    int row_mask[TC_HTB_MAXDEPTH];
-    struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-
-    /* self wait list - roots of wait PQs per row */
-    struct rb_root wait_pq[TC_HTB_MAXDEPTH];
-
-    /* time of nearest event per level (row) */
-    unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
-    /* cached value of jiffies in dequeue */
-    unsigned long jiffies;
-
-    /* whether we hit non-work conserving class during this dequeue; we use */
-    int nwc_hit;	/* this to disable mindelay complaint in dequeue */
-
-    int defcls;		/* class where unclassified flows go to */
-    u32 debug;		/* subsystem debug levels */
-
-    /* filters for qdisc itself */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
-
-    int rate2quantum;		/* quant = rate / rate2quantum */
-    psched_time_t now;		/* cached dequeue time */
-    struct timer_list timer;	/* send delay timer */
+static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
+			   int size)
+{
+	int slot = size >> rate->rate.cell_log;
+	if (slot > 255) {
+		cl->xstats.giants++;
+		slot = 255;
+	}
+	return rate->data[slot];
+}
+
+struct htb_sched {
+	struct list_head root;	/* root classes list */
+	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
+
+	/* self list - roots of self generating tree */
+	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	int row_mask[TC_HTB_MAXDEPTH];
+	struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+
+	/* self wait list - roots of wait PQs per row */
+	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
+
+	/* time of nearest event per level (row) */
+	unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+
+	/* cached value of jiffies in dequeue */
+	unsigned long jiffies;
+
+	/* whether we hit non-work conserving class during this dequeue; we use */
+	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
+
+	int defcls;		/* class where unclassified flows go to */
+
+	/* filters for qdisc itself */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
+
+	int rate2quantum;	/* quant = rate / rate2quantum */
+	psched_time_t now;	/* cached dequeue time */
+	struct timer_list timer;	/* send delay timer */
 #ifdef HTB_RATECM
-    struct timer_list rttim;	/* rate computer timer */
-    int recmp_bucket;		/* which hash bucket to recompute next */
+	struct timer_list rttim;	/* rate computer timer */
+	int recmp_bucket;	/* which hash bucket to recompute next */
 #endif
-    
-    /* non shaped skbs; let them go directly thru */
-    struct sk_buff_head direct_queue;
-    int direct_qlen;  /* max qlen of above */
 
-    long direct_pkts;
+	/* non shaped skbs; let them go directly thru */
+	struct sk_buff_head direct_queue;
+	int direct_qlen;	/* max qlen of above */
+
+	long direct_pkts;
 };
 
 /* compute hash of size HTB_HSIZE for given handle */
-static __inline__ int htb_hash(u32 h) 
+static inline int htb_hash(u32 h)
 {
 #if HTB_HSIZE != 16
- #error "Declare new hash for your HTB_HSIZE"
+#error "Declare new hash for your HTB_HSIZE"
 #endif
-    h ^= h>>8;	/* stolen from cbq_hash */
-    h ^= h>>4;
-    return h & 0xf;
+	h ^= h >> 8;		/* stolen from cbq_hash */
+	h ^= h >> 4;
+	return h & 0xf;
 }
 
 /* find class in global hash table using given handle */
-static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct list_head *p;
-	if (TC_H_MAJ(handle) != sch->handle) 
+	struct hlist_node *p;
+	struct htb_class *cl;
+
+	if (TC_H_MAJ(handle) != sch->handle)
 		return NULL;
-	
-	list_for_each (p,q->hash+htb_hash(handle)) {
-		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+
+	hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
 		if (cl->classid == handle)
 			return cl;
 	}
@@ -304,7 +251,8 @@ static inline u32 htb_classid(struct htb
 	return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
 }
 
-static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
@@ -316,8 +264,8 @@ static struct htb_class *htb_classify(st
 	   note that nfmark can be used too by attaching filter fw with no
 	   rules in it */
 	if (skb->priority == sch->handle)
-		return HTB_DIRECT;  /* X:0 (direct flow) selected */
-	if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
+		return HTB_DIRECT;	/* X:0 (direct flow) selected */
+	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 		return cl;
 
 	*qerr = NET_XMIT_BYPASS;
@@ -326,7 +274,7 @@ static struct htb_class *htb_classify(st
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
-		case TC_ACT_STOLEN: 
+		case TC_ACT_STOLEN:
 			*qerr = NET_XMIT_SUCCESS;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -335,97 +283,44 @@ static struct htb_class *htb_classify(st
 		if (result == TC_POLICE_SHOT)
 			return HTB_DIRECT;
 #endif
-		if ((cl = (void*)res.class) == NULL) {
+		if ((cl = (void *)res.class) == NULL) {
 			if (res.classid == sch->handle)
-				return HTB_DIRECT;  /* X:0 (direct flow) */
-			if ((cl = htb_find(res.classid,sch)) == NULL)
-				break; /* filter selected invalid classid */
+				return HTB_DIRECT;	/* X:0 (direct flow) */
+			if ((cl = htb_find(res.classid, sch)) == NULL)
+				break;	/* filter selected invalid classid */
 		}
 		if (!cl->level)
-			return cl; /* we hit leaf; return it */
+			return cl;	/* we hit leaf; return it */
 
 		/* we have got inner class; apply inner filter chain */
 		tcf = cl->filter_list;
 	}
 	/* classification failed; try to use default class */
-	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
+	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 	if (!cl || cl->level)
-		return HTB_DIRECT; /* bad default .. this is safe bet */
+		return HTB_DIRECT;	/* bad default .. this is safe bet */
 	return cl;
 }
 
-#ifdef HTB_DEBUG
-static void htb_next_rb_node(struct rb_node **n);
-#define HTB_DUMTREE(root,memb) if(root) { \
-	struct rb_node *n = (root)->rb_node; \
-	while (n->rb_left) n = n->rb_left; \
-	while (n) { \
-		struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
-		printk(" %x",cl->classid); htb_next_rb_node (&n); \
-	} }
-
-static void htb_debug_dump (struct htb_sched *q)
-{
-	int i,p;
-	printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
-	/* rows */
-	for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
-		printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
-		for (p=0;p<TC_HTB_NUMPRIO;p++) {
-			if (!q->row[i][p].rb_node) continue;
-			printk(" p%d:",p);
-			HTB_DUMTREE(q->row[i]+p,node[p]);
-		}
-		printk("\n");
-	}
-	/* classes */
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *l;
-		list_for_each (l,q->hash+i) {
-			struct htb_class *cl = list_entry(l,struct htb_class,hlist);
-			long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-			printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
-					"pa=%x f:",
-				cl->classid,cl->cmode,cl->tokens,cl->ctokens,
-				cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
-				cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
-			if (cl->level)
-			for (p=0;p<TC_HTB_NUMPRIO;p++) {
-				if (!cl->un.inner.feed[p].rb_node) continue;
-				printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
-				HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
-			}
-			printk("\n");
-		}
-	}
-}
-#endif
 /**
  * htb_add_to_id_tree - adds class to the round robin list
  *
  * Routine adds class to the list (actually tree) sorted by classid.
  * Make sure that class is not already on such list for given prio.
  */
-static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
-		struct htb_class *cl,int prio)
+static void htb_add_to_id_tree(struct rb_root *root,
+			       struct htb_class *cl, int prio)
 {
 	struct rb_node **p = &root->rb_node, *parent = NULL;
-	HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
-#ifdef HTB_DEBUG
-	if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
-	HTB_CHCL(cl);
-	if (*p) {
-		struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
-		HTB_CHCL(x);
-	}
-#endif
+
 	while (*p) {
-		struct htb_class *c; parent = *p;
+		struct htb_class *c;
+		parent = *p;
 		c = rb_entry(parent, struct htb_class, node[prio]);
-		HTB_CHCL(c);
+
 		if (cl->classid > c->classid)
 			p = &parent->rb_right;
-		else 
+		else
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->node[prio], parent, p);
@@ -439,17 +334,11 @@ static void htb_add_to_id_tree (HTB_ARGQ
  * change its mode in cl->pq_key microseconds. Make sure that class is not
  * already in the queue.
  */
-static void htb_add_to_wait_tree (struct htb_sched *q,
-		struct htb_class *cl,long delay,int debug_hint)
+static void htb_add_to_wait_tree(struct htb_sched *q,
+				 struct htb_class *cl, long delay)
 {
 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
-	HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
-#ifdef HTB_DEBUG
-	if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
-	HTB_CHCL(cl);
-	if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
-		printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
-#endif
+
 	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
 	if (cl->pq_key == q->jiffies)
 		cl->pq_key++;
@@ -457,13 +346,14 @@ static void htb_add_to_wait_tree (struct
 	/* update the nearest event cache */
 	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
 		q->near_ev_cache[cl->level] = cl->pq_key;
-	
+
 	while (*p) {
-		struct htb_class *c; parent = *p;
+		struct htb_class *c;
+		parent = *p;
 		c = rb_entry(parent, struct htb_class, pq_node);
 		if (time_after_eq(cl->pq_key, c->pq_key))
 			p = &parent->rb_right;
-		else 
+		else
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->pq_node, parent, p);
@@ -476,7 +366,7 @@ static void htb_add_to_wait_tree (struct
  * When we are past last key we return NULL.
  * Average complexity is 2 steps per call.
  */
-static void htb_next_rb_node(struct rb_node **n)
+static inline void htb_next_rb_node(struct rb_node **n)
 {
 	*n = rb_next(*n);
 }
@@ -487,42 +377,51 @@ static void htb_next_rb_node(struct rb_n
  * The class is added to row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static inline void htb_add_class_to_row(struct htb_sched *q, 
-		struct htb_class *cl,int mask)
+static inline void htb_add_class_to_row(struct htb_sched *q,
+					struct htb_class *cl, int mask)
 {
-	HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
-			cl->classid,mask,q->row_mask[cl->level]);
-	HTB_CHCL(cl);
 	q->row_mask[cl->level] |= mask;
 	while (mask) {
 		int prio = ffz(~mask);
 		mask &= ~(1 << prio);
-		htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
+		htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 	}
 }
 
+/* If this triggers, it is a bug in this code, but it need not be fatal */
+static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
+{
+	if (RB_EMPTY_NODE(rb)) {
+		WARN_ON(1);
+	} else {
+		rb_erase(rb, root);
+		RB_CLEAR_NODE(rb);
+	}
+}
+
+
 /**
  * htb_remove_class_from_row - removes class from its row
  *
  * The class is removed from row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
-		struct htb_class *cl,int mask)
+static inline void htb_remove_class_from_row(struct htb_sched *q,
+						 struct htb_class *cl, int mask)
 {
 	int m = 0;
-	HTB_CHCL(cl);
+
 	while (mask) {
 		int prio = ffz(~mask);
+
 		mask &= ~(1 << prio);
-		if (q->ptr[cl->level][prio] == cl->node+prio)
-			htb_next_rb_node(q->ptr[cl->level]+prio);
-		htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
-		if (!q->row[cl->level][prio].rb_node) 
+		if (q->ptr[cl->level][prio] == cl->node + prio)
+			htb_next_rb_node(q->ptr[cl->level] + prio);
+
+		htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
+		if (!q->row[cl->level][prio].rb_node)
 			m |= 1 << prio;
 	}
-	HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
-			cl->classid,mask,q->row_mask[cl->level],m);
 	q->row_mask[cl->level] &= ~m;
 }
 
@@ -533,34 +432,31 @@ static __inline__ void htb_remove_class_
  * for priorities it is participating on. cl->cmode must be new 
  * (activated) mode. It does nothing if cl->prio_activity == 0.
  */
-static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
+static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
-	long m,mask = cl->prio_activity;
-	HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-	HTB_CHCL(cl);
+	long m, mask = cl->prio_activity;
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-		HTB_CHCL(p);
-		m = mask; while (m) {
+		m = mask;
+		while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
-			
+
 			if (p->un.inner.feed[prio].rb_node)
 				/* parent already has its feed in use so that
 				   reset bit in mask as parent is already ok */
 				mask &= ~(1 << prio);
-			
-			htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
+
+			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 		}
-		HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-				p->classid,p->prio_activity,mask,p->cmode);
 		p->prio_activity |= mask;
-		cl = p; p = cl->parent;
-		HTB_CHCL(cl);
+		cl = p;
+		p = cl->parent;
+
 	}
 	if (cl->cmode == HTB_CAN_SEND && mask)
-		htb_add_class_to_row(q,cl,mask);
+		htb_add_class_to_row(q, cl, mask);
 }
 
 /**
@@ -573,39 +469,52 @@ static void htb_activate_prios(struct ht
 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
-	long m,mask = cl->prio_activity;
-	HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-	HTB_CHCL(cl);
+	long m, mask = cl->prio_activity;
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-		m = mask; mask = 0; 
+		m = mask;
+		mask = 0;
 		while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
-			
-			if (p->un.inner.ptr[prio] == cl->node+prio) {
+
+			if (p->un.inner.ptr[prio] == cl->node + prio) {
 				/* we are removing child which is pointed to from
 				   parent feed - forget the pointer but remember
 				   classid */
 				p->un.inner.last_ptr_id[prio] = cl->classid;
 				p->un.inner.ptr[prio] = NULL;
 			}
-			
-			htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
-			
-			if (!p->un.inner.feed[prio].rb_node) 
+
+			htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
+
+			if (!p->un.inner.feed[prio].rb_node)
 				mask |= 1 << prio;
 		}
-		HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-				p->classid,p->prio_activity,mask,p->cmode);
+
 		p->prio_activity &= ~mask;
-		cl = p; p = cl->parent;
-		HTB_CHCL(cl);
+		cl = p;
+		p = cl->parent;
+
 	}
-	if (cl->cmode == HTB_CAN_SEND && mask) 
-		htb_remove_class_from_row(q,cl,mask);
+	if (cl->cmode == HTB_CAN_SEND && mask)
+		htb_remove_class_from_row(q, cl, mask);
 }
 
+#if HTB_HYSTERESIS
+static inline long htb_lowater(const struct htb_class *cl)
+{
+	return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
+}
+static inline long htb_hiwater(const struct htb_class *cl)
+{
+	return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
+}
+#else
+#define htb_lowater(cl)	(0)
+#define htb_hiwater(cl)	(0)
+#endif
+
 /**
  * htb_class_mode - computes and returns current class mode
  *
@@ -617,28 +526,21 @@ static void htb_deactivate_prios(struct 
  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
  * mode transitions per time unit. The speed gain is about 1/6.
  */
-static __inline__ enum htb_cmode 
-htb_class_mode(struct htb_class *cl,long *diff)
+static inline enum htb_cmode
+htb_class_mode(struct htb_class *cl, long *diff)
 {
-    long toks;
+	long toks;
 
-    if ((toks = (cl->ctokens + *diff)) < (
-#if HTB_HYSTERESIS
-	    cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
-#endif
-       	    0)) {
-	    *diff = -toks;
-	    return HTB_CANT_SEND;
-    }
-    if ((toks = (cl->tokens + *diff)) >= (
-#if HTB_HYSTERESIS
-	    cl->cmode == HTB_CAN_SEND ? -cl->buffer :
-#endif
-	    0))
-	    return HTB_CAN_SEND;
+	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
+		*diff = -toks;
+		return HTB_CANT_SEND;
+	}
 
-    *diff = -toks;
-    return HTB_MAY_BORROW;
+	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
+		return HTB_CAN_SEND;
+
+	*diff = -toks;
+	return HTB_MAY_BORROW;
 }
 
 /**
@@ -650,24 +552,21 @@ htb_class_mode(struct htb_class *cl,long
  * be different from old one and cl->pq_key has to be valid if changing
  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
  */
-static void 
+static void
 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
-{ 
-	enum htb_cmode new_mode = htb_class_mode(cl,diff);
-	
-	HTB_CHCL(cl);
-	HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
+{
+	enum htb_cmode new_mode = htb_class_mode(cl, diff);
 
 	if (new_mode == cl->cmode)
-		return;	
-	
-	if (cl->prio_activity) { /* not necessary: speed optimization */
-		if (cl->cmode != HTB_CANT_SEND) 
-			htb_deactivate_prios(q,cl);
+		return;
+
+	if (cl->prio_activity) {	/* not necessary: speed optimization */
+		if (cl->cmode != HTB_CANT_SEND)
+			htb_deactivate_prios(q, cl);
 		cl->cmode = new_mode;
-		if (new_mode != HTB_CANT_SEND) 
-			htb_activate_prios(q,cl);
-	} else 
+		if (new_mode != HTB_CANT_SEND)
+			htb_activate_prios(q, cl);
+	} else
 		cl->cmode = new_mode;
 }
 
@@ -678,14 +577,15 @@ htb_change_class_mode(struct htb_sched *
  * for the prio. It can be called on already active leaf safely.
  * It also adds leaf into droplist.
  */
-static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
 	BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
-	HTB_CHCL(cl);
+
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
-		htb_activate_prios(q,cl);
-		list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
+		htb_activate_prios(q, cl);
+		list_add_tail(&cl->un.leaf.drop_list,
+			      q->drops + cl->un.leaf.aprio);
 	}
 }
 
@@ -695,120 +595,120 @@ static __inline__ void htb_activate(stru
  * Make sure that leaf is active. In the other words it can't be called
  * with non-active leaf. It also removes class from the drop list.
  */
-static __inline__ void 
-htb_deactivate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 {
 	BUG_TRAP(cl->prio_activity);
-	HTB_CHCL(cl);
-	htb_deactivate_prios(q,cl);
+
+	htb_deactivate_prios(q, cl);
 	cl->prio_activity = 0;
 	list_del_init(&cl->un.leaf.drop_list);
 }
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    int ret;
-    struct htb_sched *q = qdisc_priv(sch);
-    struct htb_class *cl = htb_classify(skb,sch,&ret);
-
-    if (cl == HTB_DIRECT) {
-	/* enqueue to helper queue */
-	if (q->direct_queue.qlen < q->direct_qlen) {
-	    __skb_queue_tail(&q->direct_queue, skb);
-	    q->direct_pkts++;
-	} else {
-	    kfree_skb(skb);
-	    sch->qstats.drops++;
-	    return NET_XMIT_DROP;
-	}
+	int ret;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+
+	if (cl == HTB_DIRECT) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen) {
+			__skb_queue_tail(&q->direct_queue, skb);
+			q->direct_pkts++;
+		} else {
+			kfree_skb(skb);
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
 #ifdef CONFIG_NET_CLS_ACT
-    } else if (!cl) {
-	if (ret == NET_XMIT_BYPASS)
-		sch->qstats.drops++;
-	kfree_skb (skb);
-	return ret;
+	} else if (!cl) {
+		if (ret == NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
 #endif
-    } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-	sch->qstats.drops++;
-	cl->qstats.drops++;
-	return NET_XMIT_DROP;
-    } else {
-	cl->bstats.packets++; cl->bstats.bytes += skb->len;
-	htb_activate (q,cl);
-    }
-
-    sch->q.qlen++;
-    sch->bstats.packets++; sch->bstats.bytes += skb->len;
-    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
-    return NET_XMIT_SUCCESS;
+	} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
+		   NET_XMIT_SUCCESS) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+		return NET_XMIT_DROP;
+	} else {
+		cl->bstats.packets++;
+		cl->bstats.bytes += skb->len;
+		htb_activate(q, cl);
+	}
+
+	sch->q.qlen++;
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+	return NET_XMIT_SUCCESS;
 }
 
 /* TODO: requeuing packet charges it to policers again !! */
 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    struct htb_sched *q = qdisc_priv(sch);
-    int ret =  NET_XMIT_SUCCESS;
-    struct htb_class *cl = htb_classify(skb,sch, &ret);
-    struct sk_buff *tskb;
-
-    if (cl == HTB_DIRECT || !cl) {
-	/* enqueue to helper queue */
-	if (q->direct_queue.qlen < q->direct_qlen && cl) {
-	    __skb_queue_head(&q->direct_queue, skb);
-	} else {
-            __skb_queue_head(&q->direct_queue, skb);
-            tskb = __skb_dequeue_tail(&q->direct_queue);
-            kfree_skb (tskb);
-            sch->qstats.drops++;
-            return NET_XMIT_CN;	
-	}
-    } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-	sch->qstats.drops++;
-	cl->qstats.drops++;
-	return NET_XMIT_DROP;
-    } else 
-	    htb_activate (q,cl);
-
-    sch->q.qlen++;
-    sch->qstats.requeues++;
-    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
-    return NET_XMIT_SUCCESS;
+	struct htb_sched *q = qdisc_priv(sch);
+	int ret = NET_XMIT_SUCCESS;
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+	struct sk_buff *tskb;
+
+	if (cl == HTB_DIRECT || !cl) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen && cl) {
+			__skb_queue_head(&q->direct_queue, skb);
+		} else {
+			__skb_queue_head(&q->direct_queue, skb);
+			tskb = __skb_dequeue_tail(&q->direct_queue);
+			kfree_skb(tskb);
+			sch->qstats.drops++;
+			return NET_XMIT_CN;
+		}
+	} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+		   NET_XMIT_SUCCESS) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+		return NET_XMIT_DROP;
+	} else
+		htb_activate(q, cl);
+
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+	return NET_XMIT_SUCCESS;
 }
 
 static void htb_timer(unsigned long arg)
 {
-    struct Qdisc *sch = (struct Qdisc*)arg;
-    sch->flags &= ~TCQ_F_THROTTLED;
-    wmb();
-    netif_schedule(sch->dev);
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	sch->flags &= ~TCQ_F_THROTTLED;
+	wmb();
+	netif_schedule(sch->dev);
 }
 
 #ifdef HTB_RATECM
 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
 static void htb_rate_timer(unsigned long arg)
 {
-	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct list_head *p;
+	struct hlist_node *p;
+	struct htb_class *cl;
+
 
 	/* lock queue so that we can muck with it */
-	HTB_QLOCK(sch);
-	HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
+	spin_lock_bh(&sch->dev->queue_lock);
 
 	q->rttim.expires = jiffies + HZ;
 	add_timer(&q->rttim);
 
 	/* scan and recompute one bucket at time */
-	if (++q->recmp_bucket >= HTB_HSIZE) 
+	if (++q->recmp_bucket >= HTB_HSIZE)
 		q->recmp_bucket = 0;
-	list_for_each (p,q->hash+q->recmp_bucket) {
-		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
-		HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
-				cl->classid,cl->sum_bytes,cl->sum_packets);
-		RT_GEN (cl->sum_bytes,cl->rate_bytes);
-		RT_GEN (cl->sum_packets,cl->rate_packets);
+
+	hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
+		RT_GEN(cl->sum_bytes, cl->rate_bytes);
+		RT_GEN(cl->sum_packets, cl->rate_packets);
 	}
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 }
 #endif
 
@@ -823,12 +723,11 @@ static void htb_rate_timer(unsigned long
  * CAN_SEND) because we can use more precise clock that event queue here.
  * In such case we remove class from event queue first.
  */
-static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
-		int level,int bytes)
-{	
-	long toks,diff;
+static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
+			     int level, int bytes)
+{
+	long toks, diff;
 	enum htb_cmode old_mode;
-	HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
 
 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
 	if (toks > cl->B) toks = cl->B; \
@@ -837,47 +736,31 @@ static void htb_charge_class(struct htb_
 	cl->T = toks
 
 	while (cl) {
-		HTB_CHCL(cl);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-				       cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-				       (unsigned long long) q->now,
-				       (unsigned long long) cl->t_c,
-#endif
-				       q->jiffies);
-			diff = 1000;
-		}
-#endif
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
 		if (cl->level >= level) {
-			if (cl->level == level) cl->xstats.lends++;
-			HTB_ACCNT (tokens,buffer,rate);
+			if (cl->level == level)
+				cl->xstats.lends++;
+			HTB_ACCNT(tokens, buffer, rate);
 		} else {
 			cl->xstats.borrows++;
-			cl->tokens += diff; /* we moved t_c; update tokens */
+			cl->tokens += diff;	/* we moved t_c; update tokens */
 		}
-		HTB_ACCNT (ctokens,cbuffer,ceil);
+		HTB_ACCNT(ctokens, cbuffer, ceil);
 		cl->t_c = q->now;
-		HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
 
-		old_mode = cl->cmode; diff = 0;
-		htb_change_class_mode(q,cl,&diff);
+		old_mode = cl->cmode;
+		diff = 0;
+		htb_change_class_mode(q, cl, &diff);
 		if (old_mode != cl->cmode) {
 			if (old_mode != HTB_CAN_SEND)
-				htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+				htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 			if (cl->cmode != HTB_CAN_SEND)
-				htb_add_to_wait_tree (q,cl,diff,1);
+				htb_add_to_wait_tree(q, cl, diff);
 		}
-		
 #ifdef HTB_RATECM
 		/* update rate counters */
-		cl->sum_bytes += bytes; cl->sum_packets++;
+		cl->sum_bytes += bytes;
+		cl->sum_packets++;
 #endif
 
 		/* update byte stats except for leaves which are already updated */
@@ -896,60 +779,46 @@ static void htb_charge_class(struct htb_
  * next pending event (0 for no event in pq).
  * Note: Aplied are events whose have cl->pq_key <= jiffies.
  */
-static long htb_do_events(struct htb_sched *q,int level)
+static long htb_do_events(struct htb_sched *q, int level)
 {
 	int i;
-	HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
-			level,q->wait_pq[level].rb_node,q->row_mask[level]);
+
 	for (i = 0; i < 500; i++) {
 		struct htb_class *cl;
 		long diff;
 		struct rb_node *p = q->wait_pq[level].rb_node;
-		if (!p) return 0;
-		while (p->rb_left) p = p->rb_left;
+		if (!p)
+			return 0;
+		while (p->rb_left)
+			p = p->rb_left;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
 		if (time_after(cl->pq_key, q->jiffies)) {
-			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
 			return cl->pq_key - q->jiffies;
 		}
-		htb_safe_rb_erase(p,q->wait_pq+level);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-				       cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-				       (unsigned long long) q->now,
-				       (unsigned long long) cl->t_c,
-#endif
-				       q->jiffies);
-			diff = 1000;
-		}
-#endif
-		htb_change_class_mode(q,cl,&diff);
+		htb_safe_rb_erase(p, q->wait_pq + level);
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		htb_change_class_mode(q, cl, &diff);
 		if (cl->cmode != HTB_CAN_SEND)
-			htb_add_to_wait_tree (q,cl,diff,2);
+			htb_add_to_wait_tree(q, cl, diff);
 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "htb: too many events !\n");
-	return HZ/10;
+	return HZ / 10;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
    is no such one exists. */
-static struct rb_node *
-htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
+static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
+					      u32 id)
 {
 	struct rb_node *r = NULL;
 	while (n) {
-		struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
-		if (id == cl->classid) return n;
-		
+		struct htb_class *cl =
+		    rb_entry(n, struct htb_class, node[prio]);
+		if (id == cl->classid)
+			return n;
+
 		if (id > cl->classid) {
 			n = n->rb_right;
 		} else {
@@ -965,49 +834,49 @@ htb_id_find_next_upper(int prio,struct r
  *
  * Find leaf where current feed pointers points to.
  */
-static struct htb_class *
-htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
+					 struct rb_node **pptr, u32 * pid)
 {
 	int i;
 	struct {
 		struct rb_node *root;
 		struct rb_node **pptr;
 		u32 *pid;
-	} stk[TC_HTB_MAXDEPTH],*sp = stk;
-	
+	} stk[TC_HTB_MAXDEPTH], *sp = stk;
+
 	BUG_TRAP(tree->rb_node);
 	sp->root = tree->rb_node;
 	sp->pptr = pptr;
 	sp->pid = pid;
 
 	for (i = 0; i < 65535; i++) {
-		HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
-		
-		if (!*sp->pptr && *sp->pid) { 
+		if (!*sp->pptr && *sp->pid) {
 			/* ptr was invalidated but id is valid - try to recover 
 			   the original or next ptr */
-			*sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+			*sp->pptr =
+			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 		}
-		*sp->pid = 0; /* ptr is valid now so that remove this hint as it
-			         can become out of date quickly */
-		if (!*sp->pptr) { /* we are at right end; rewind & go up */
+		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
+				   can become out of date quickly */
+		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 			*sp->pptr = sp->root;
-			while ((*sp->pptr)->rb_left) 
+			while ((*sp->pptr)->rb_left)
 				*sp->pptr = (*sp->pptr)->rb_left;
 			if (sp > stk) {
 				sp--;
-				BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
-				htb_next_rb_node (sp->pptr);
+				BUG_TRAP(*sp->pptr);
+				if (!*sp->pptr)
+					return NULL;
+				htb_next_rb_node(sp->pptr);
 			}
 		} else {
 			struct htb_class *cl;
-			cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
-			HTB_CHCL(cl);
-			if (!cl->level) 
+			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
+			if (!cl->level)
 				return cl;
 			(++sp)->root = cl->un.inner.feed[prio].rb_node;
-			sp->pptr = cl->un.inner.ptr+prio;
-			sp->pid = cl->un.inner.last_ptr_id+prio;
+			sp->pptr = cl->un.inner.ptr + prio;
+			sp->pid = cl->un.inner.last_ptr_id + prio;
 		}
 	}
 	BUG_TRAP(0);
@@ -1016,21 +885,21 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root 
 
 /* dequeues packet at given priority and level; call only if
    you are sure that there is active class at prio/level */
-static struct sk_buff *
-htb_dequeue_tree(struct htb_sched *q,int prio,int level)
+static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
+					int level)
 {
 	struct sk_buff *skb = NULL;
-	struct htb_class *cl,*start;
+	struct htb_class *cl, *start;
 	/* look initial class up in the row */
-	start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
-			q->ptr[level]+prio,q->last_ptr_id[level]+prio);
-	
+	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
+
 	do {
 next:
-		BUG_TRAP(cl); 
-		if (!cl) return NULL;
-		HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
-				prio,level,cl->classid,cl->un.leaf.deficit[level]);
+		BUG_TRAP(cl);
+		if (!cl)
+			return NULL;
 
 		/* class can be empty - it is unlikely but can be true if leaf
 		   qdisc drops packets in enqueue routine or if someone used
@@ -1038,64 +907,69 @@ next:
 		   simply deactivate and skip such class */
 		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
-			htb_deactivate(q,cl);
+			htb_deactivate(q, cl);
 
 			/* row/level might become empty */
 			if ((q->row_mask[level] & (1 << prio)) == 0)
-				return NULL; 
-			
-			next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
-					prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+				return NULL;
+
+			next = htb_lookup_leaf(q->row[level] + prio,
+					       prio, q->ptr[level] + prio,
+					       q->last_ptr_id[level] + prio);
 
-			if (cl == start) /* fix start if we just deleted it */
+			if (cl == start)	/* fix start if we just deleted it */
 				start = next;
 			cl = next;
 			goto next;
 		}
-	
-		if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
+
+		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+		if (likely(skb != NULL))
 			break;
 		if (!cl->warned) {
-			printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
+			printk(KERN_WARNING
+			       "htb: class %X isn't work conserving ?!\n",
+			       cl->classid);
 			cl->warned = 1;
 		}
 		q->nwc_hit++;
-		htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
-		cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
-				q->last_ptr_id[level]+prio);
+		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+				  ptr[0]) + prio);
+		cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
 
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
 		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
-			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
-				level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
-			htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+					  ptr[0]) + prio);
 		}
 		/* this used to be after charge_class but this constelation
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
-			htb_deactivate (q,cl);
-		htb_charge_class (q,cl,level,skb->len);
+			htb_deactivate(q, cl);
+		htb_charge_class(q, cl, level, skb->len);
 	}
 	return skb;
 }
 
-static void htb_delay_by(struct Qdisc *sch,long delay)
+static void htb_delay_by(struct Qdisc *sch, long delay)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	if (delay <= 0) delay = 1;
-	if (unlikely(delay > 5*HZ)) {
+	if (delay <= 0)
+		delay = 1;
+	if (unlikely(delay > 5 * HZ)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
-		delay = 5*HZ;
+		delay = 5 * HZ;
 	}
 	/* why don't use jiffies here ? because expires can be in past */
 	mod_timer(&q->timer, q->jiffies + delay);
 	sch->flags |= TCQ_F_THROTTLED;
 	sch->qstats.overlimits++;
-	HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
 }
 
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
@@ -1104,22 +978,19 @@ static struct sk_buff *htb_dequeue(struc
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
 	long min_delay;
-#ifdef HTB_DEBUG
-	int evs_used = 0;
-#endif
 
 	q->jiffies = jiffies;
-	HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
-			sch->q.qlen);
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
-	if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
+	skb = __skb_dequeue(&q->direct_queue);
+	if (skb != NULL) {
 		sch->flags &= ~TCQ_F_THROTTLED;
 		sch->q.qlen--;
 		return skb;
 	}
 
-	if (!sch->q.qlen) goto fin;
+	if (!sch->q.qlen)
+		goto fin;
 	PSCHED_GET_TIME(q->now);
 
 	min_delay = LONG_MAX;
@@ -1129,21 +1000,19 @@ static struct sk_buff *htb_dequeue(struc
 		int m;
 		long delay;
 		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
-			delay = htb_do_events(q,level);
-			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
-#ifdef HTB_DEBUG
-			evs_used++;
-#endif
+			delay = htb_do_events(q, level);
+			q->near_ev_cache[level] =
+			    q->jiffies + (delay ? delay : HZ);
 		} else
-			delay = q->near_ev_cache[level] - q->jiffies;	
-		
-		if (delay && min_delay > delay) 
+			delay = q->near_ev_cache[level] - q->jiffies;
+
+		if (delay && min_delay > delay)
 			min_delay = delay;
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
-			int prio = ffz (m);
+			int prio = ffz(m);
 			m |= 1 << prio;
-			skb = htb_dequeue_tree(q,prio,level);
+			skb = htb_dequeue_tree(q, prio, level);
 			if (likely(skb != NULL)) {
 				sch->q.qlen--;
 				sch->flags &= ~TCQ_F_THROTTLED;
@@ -1151,40 +1020,28 @@ static struct sk_buff *htb_dequeue(struc
 			}
 		}
 	}
-#ifdef HTB_DEBUG
-	if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
-		if (min_delay == LONG_MAX) {
-			printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
-					evs_used,q->jiffies,jiffies);
-			htb_debug_dump(q);
-		} else 
-			printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
-					"too small rate\n",min_delay);
-	}
-#endif
-	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
+	htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
 fin:
-	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
 	return skb;
 }
 
 /* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int prio;
 
 	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 		struct list_head *p;
-		list_for_each (p,q->drops+prio) {
+		list_for_each(p, q->drops + prio) {
 			struct htb_class *cl = list_entry(p, struct htb_class,
 							  un.leaf.drop_list);
 			unsigned int len;
-			if (cl->un.leaf.q->ops->drop && 
-				(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+			if (cl->un.leaf.q->ops->drop &&
+			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 				sch->q.qlen--;
 				if (!cl->un.leaf.q->q.qlen)
-					htb_deactivate (q,cl);
+					htb_deactivate(q, cl);
 				return len;
 			}
 		}
@@ -1194,29 +1051,25 @@ static unsigned int htb_drop(struct Qdis
 
 /* reset all classes */
 /* always caled under BH & queue lock */
-static void htb_reset(struct Qdisc* sch)
+static void htb_reset(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int i;
-	HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
 
 	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *p;
-		list_for_each (p,q->hash+i) {
-			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		struct hlist_node *p;
+		struct htb_class *cl;
+
+		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 			if (cl->level)
-				memset(&cl->un.inner,0,sizeof(cl->un.inner));
+				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
-				if (cl->un.leaf.q) 
+				if (cl->un.leaf.q)
 					qdisc_reset(cl->un.leaf.q);
 				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
-#ifdef HTB_DEBUG
-			cl->pq_node.rb_color = -1;
-			memset(cl->node,255,sizeof(cl->node));
-#endif
 
 		}
 	}
@@ -1224,12 +1077,12 @@ static void htb_reset(struct Qdisc* sch)
 	del_timer(&q->timer);
 	__skb_queue_purge(&q->direct_queue);
 	sch->q.qlen = 0;
-	memset(q->row,0,sizeof(q->row));
-	memset(q->row_mask,0,sizeof(q->row_mask));
-	memset(q->wait_pq,0,sizeof(q->wait_pq));
-	memset(q->ptr,0,sizeof(q->ptr));
+	memset(q->row, 0, sizeof(q->row));
+	memset(q->row_mask, 0, sizeof(q->row_mask));
+	memset(q->wait_pq, 0, sizeof(q->wait_pq));
+	memset(q->ptr, 0, sizeof(q->ptr));
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops+i);
+		INIT_LIST_HEAD(q->drops + i);
 }
 
 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
@@ -1238,36 +1091,31 @@ static int htb_init(struct Qdisc *sch, s
 	struct rtattr *tb[TCA_HTB_INIT];
 	struct tc_htb_glob *gopt;
 	int i;
-#ifdef HTB_DEBUG
-	printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
-			  HTB_VER >> 16,HTB_VER & 0xffff);
-#endif
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
-			tb[TCA_HTB_INIT-1] == NULL ||
-			RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
+	    tb[TCA_HTB_INIT - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
-	gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
+	gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
 	if (gopt->version != HTB_VER >> 16) {
-		printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
-				HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
+		printk(KERN_ERR
+		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
 		return -EINVAL;
 	}
-	q->debug = gopt->debug;
-	HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
 
 	INIT_LIST_HEAD(&q->root);
 	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_LIST_HEAD(q->hash+i);
+		INIT_HLIST_HEAD(q->hash + i);
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops+i);
+		INIT_LIST_HEAD(q->drops + i);
 
 	init_timer(&q->timer);
 	skb_queue_head_init(&q->direct_queue);
 
 	q->direct_qlen = sch->dev->tx_queue_len;
-	if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
 	q->timer.function = htb_timer;
 	q->timer.data = (unsigned long)sch;
@@ -1289,80 +1137,72 @@ static int htb_init(struct Qdisc *sch, s
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
-	HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 	gopt.direct_pkts = q->direct_pkts;
 
-#ifdef HTB_DEBUG
-	if (HTB_DBG_COND(0,2))
-		htb_debug_dump(q);
-#endif
 	gopt.version = HTB_VER;
 	gopt.rate2quantum = q->rate2quantum;
 	gopt.defcls = q->defcls;
-	gopt.debug = q->debug;
-	rta = (struct rtattr*)b;
+	gopt.debug = 0;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	rta->rta_len = skb->tail - b;
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	skb_trim(skb, skb->tail - skb->data);
 	return -1;
 }
 
 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
-	struct sk_buff *skb, struct tcmsg *tcm)
+			  struct sk_buff *skb, struct tcmsg *tcm)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
-	struct htb_class *cl = (struct htb_class*)arg;
-	unsigned char	 *b = skb->tail;
+	struct htb_class *cl = (struct htb_class *)arg;
+	unsigned char *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
-	HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
-
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->classid;
 	if (!cl->level && cl->un.leaf.q)
 		tcm->tcm_info = cl->un.leaf.q->handle;
 
-	rta = (struct rtattr*)b;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
-	memset (&opt,0,sizeof(opt));
+	memset(&opt, 0, sizeof(opt));
 
-	opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
-	opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
-	opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
-	opt.level = cl->level; 
+	opt.rate = cl->rate->rate;
+	opt.buffer = cl->buffer;
+	opt.ceil = cl->ceil->rate;
+	opt.cbuffer = cl->cbuffer;
+	opt.quantum = cl->un.leaf.quantum;
+	opt.prio = cl->un.leaf.prio;
+	opt.level = cl->level;
 	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 	rta->rta_len = skb->tail - b;
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
 
 static int
-htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
-	struct gnet_dump *d)
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 #ifdef HTB_RATECM
-	cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
-	cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
+	cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
+	cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
 #endif
 
 	if (!cl->level && cl->un.leaf.q)
@@ -1379,21 +1219,22 @@ htb_dump_class_stats(struct Qdisc *sch, 
 }
 
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-	struct Qdisc **old)
+		     struct Qdisc **old)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (cl && !cl->level) {
-		if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
-					&pfifo_qdisc_ops)) == NULL)
-					return -ENOBUFS;
+		if (new == NULL && (new = qdisc_create_dflt(sch->dev,
+							    &pfifo_qdisc_ops))
+		    == NULL)
+			return -ENOBUFS;
 		sch_tree_lock(sch);
 		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
 			if (cl->prio_activity)
-				htb_deactivate (qdisc_priv(sch),cl);
+				htb_deactivate(qdisc_priv(sch), cl);
 
 			/* TODO: is it correct ? Why CBQ doesn't do it ? */
-			sch->q.qlen -= (*old)->q.qlen;	
+			sch->q.qlen -= (*old)->q.qlen;
 			qdisc_reset(*old);
 		}
 		sch_tree_unlock(sch);
@@ -1402,20 +1243,16 @@ static int htb_graft(struct Qdisc *sch, 
 	return -ENOENT;
 }
 
-static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 	return (cl && !cl->level) ? cl->un.leaf.q : NULL;
 }
 
 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
-	struct htb_class *cl = htb_find(classid,sch);
-	HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
-	if (cl) 
+	struct htb_class *cl = htb_find(classid, sch);
+	if (cl)
 		cl->refcnt++;
 	return (unsigned long)cl;
 }
@@ -1430,10 +1267,9 @@ static void htb_destroy_filters(struct t
 	}
 }
 
-static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
+static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
 	if (!cl->level) {
 		BUG_TRAP(cl->un.leaf.q);
 		sch->q.qlen -= cl->un.leaf.q->q.qlen;
@@ -1441,45 +1277,45 @@ static void htb_destroy_class(struct Qdi
 	}
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
-	
-	htb_destroy_filters (&cl->filter_list);
-	
-	while (!list_empty(&cl->children)) 
-		htb_destroy_class (sch,list_entry(cl->children.next,
-					struct htb_class,sibling));
+
+	htb_destroy_filters(&cl->filter_list);
+
+	while (!list_empty(&cl->children))
+		htb_destroy_class(sch, list_entry(cl->children.next,
+						  struct htb_class, sibling));
 
 	/* note: this delete may happen twice (see htb_delete) */
-	list_del(&cl->hlist);
+	if (!hlist_unhashed(&cl->hlist))
+		hlist_del(&cl->hlist);
 	list_del(&cl->sibling);
-	
+
 	if (cl->prio_activity)
-		htb_deactivate (q,cl);
-	
+		htb_deactivate(q, cl);
+
 	if (cl->cmode != HTB_CAN_SEND)
-		htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
-	
+		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
 	kfree(cl);
 }
 
 /* always caled under BH & queue lock */
-static void htb_destroy(struct Qdisc* sch)
+static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	HTB_DBG(0,1,"htb_destroy q=%p\n",q);
 
-	del_timer_sync (&q->timer);
+	del_timer_sync(&q->timer);
 #ifdef HTB_RATECM
-	del_timer_sync (&q->rttim);
+	del_timer_sync(&q->rttim);
 #endif
 	/* This line used to be after htb_destroy_class call below
 	   and surprisingly it worked in 2.4. But it must precede it 
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
 	htb_destroy_filters(&q->filter_list);
-	
-	while (!list_empty(&q->root)) 
-		htb_destroy_class (sch,list_entry(q->root.next,
-					struct htb_class,sibling));
+
+	while (!list_empty(&q->root))
+		htb_destroy_class(sch, list_entry(q->root.next,
+						  struct htb_class, sibling));
 
 	__skb_queue_purge(&q->direct_queue);
 }
@@ -1487,24 +1323,25 @@ static void htb_destroy(struct Qdisc* sc
 static int htb_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = (struct htb_class*)arg;
-	HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	// TODO: why don't allow to delete subtree ? references ? does
 	// tc subsys quarantee us that in htb_destroy it holds no class
 	// refs so that we can remove children safely there ?
 	if (!list_empty(&cl->children) || cl->filter_cnt)
 		return -EBUSY;
-	
+
 	sch_tree_lock(sch);
-	
+
 	/* delete from hash and active; remainder in destroy_class */
-	list_del_init(&cl->hlist);
+	if (!hlist_unhashed(&cl->hlist))
+		hlist_del(&cl->hlist);
+
 	if (cl->prio_activity)
-		htb_deactivate (q,cl);
+		htb_deactivate(q, cl);
 
 	if (--cl->refcnt == 0)
-		htb_destroy_class(sch,cl);
+		htb_destroy_class(sch, cl);
 
 	sch_tree_unlock(sch);
 	return 0;
@@ -1512,45 +1349,46 @@ static int htb_delete(struct Qdisc *sch,
 
 static void htb_put(struct Qdisc *sch, unsigned long arg)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
-	struct htb_class *cl = (struct htb_class*)arg;
-	HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (--cl->refcnt == 0)
-		htb_destroy_class(sch,cl);
+		htb_destroy_class(sch, cl);
 }
 
-static int htb_change_class(struct Qdisc *sch, u32 classid, 
-		u32 parentid, struct rtattr **tca, unsigned long *arg)
+static int htb_change_class(struct Qdisc *sch, u32 classid,
+			    u32 parentid, struct rtattr **tca,
+			    unsigned long *arg)
 {
 	int err = -EINVAL;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = (struct htb_class*)*arg,*parent;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct htb_class *cl = (struct htb_class *)*arg, *parent;
+	struct rtattr *opt = tca[TCA_OPTIONS - 1];
 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct rtattr *tb[TCA_HTB_RTAB];
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
-			tb[TCA_HTB_PARMS-1] == NULL ||
-			RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
+	    tb[TCA_HTB_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
 		goto failure;
-	
-	parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
 
-	hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
-	HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
-	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
-	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
-	if (!rtab || !ctab) goto failure;
+	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
+
+	hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
+
+	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
+	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
+	if (!rtab || !ctab)
+		goto failure;
 
-	if (!cl) { /* new class */
+	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
+		int prio;
+
 		/* check for valid classid */
-		if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
+		if (!classid || TC_H_MAJ(classid ^ sch->handle)
+		    || htb_find(classid, sch))
 			goto failure;
 
 		/* check maximal depth */
@@ -1561,15 +1399,16 @@ static int htb_change_class(struct Qdisc
 		err = -ENOBUFS;
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
-		
+
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
-		INIT_LIST_HEAD(&cl->hlist);
+		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-#ifdef HTB_DEBUG
-		cl->magic = HTB_CMAGIC;
-#endif
+		RB_CLEAR_NODE(&cl->pq_node);
+
+		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
+			RB_CLEAR_NODE(&cl->node[prio]);
 
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
@@ -1579,53 +1418,53 @@ static int htb_change_class(struct Qdisc
 		if (parent && !parent->level) {
 			/* turn parent into inner node */
 			sch->q.qlen -= parent->un.leaf.q->q.qlen;
-			qdisc_destroy (parent->un.leaf.q);
-			if (parent->prio_activity) 
-				htb_deactivate (q,parent);
+			qdisc_destroy(parent->un.leaf.q);
+			if (parent->prio_activity)
+				htb_deactivate(q, parent);
 
 			/* remove from evt list because of level change */
 			if (parent->cmode != HTB_CAN_SEND) {
-				htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
+				htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
 				parent->cmode = HTB_CAN_SEND;
 			}
 			parent->level = (parent->parent ? parent->parent->level
-					: TC_HTB_MAXDEPTH) - 1;
-			memset (&parent->un.inner,0,sizeof(parent->un.inner));
+					 : TC_HTB_MAXDEPTH) - 1;
+			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
 		}
 		/* leaf (we) needs elementary qdisc */
 		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
-		cl->classid = classid; cl->parent = parent;
+		cl->classid = classid;
+		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
-		cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */
+		cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60);	/* 1min */
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
-		list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
-		list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
-#ifdef HTB_DEBUG
-		{ 
-			int i;
-			for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
-			cl->pq_node.rb_color = -1;
-		}
-#endif
-	} else sch_tree_lock(sch);
+		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
+		list_add_tail(&cl->sibling,
+			      parent ? &parent->children : &q->root);
+	} else
+		sch_tree_lock(sch);
 
 	/* it used to be a nasty bug here, we have to check that node
-           is really leaf before changing cl->un.leaf ! */
+	   is really leaf before changing cl->un.leaf ! */
 	if (!cl->level) {
 		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
 		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is small. Consider r2q change.\n",
+			       cl->classid);
 			cl->un.leaf.quantum = 1000;
 		}
 		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is big. Consider r2q change.\n",
+			       cl->classid);
 			cl->un.leaf.quantum = 200000;
 		}
 		if (hopt->quantum)
@@ -1636,16 +1475,22 @@ static int htb_change_class(struct Qdisc
 
 	cl->buffer = hopt->buffer;
 	cl->cbuffer = hopt->cbuffer;
-	if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
-	if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
+	if (cl->rate)
+		qdisc_put_rtab(cl->rate);
+	cl->rate = rtab;
+	if (cl->ceil)
+		qdisc_put_rtab(cl->ceil);
+	cl->ceil = ctab;
 	sch_tree_unlock(sch);
 
 	*arg = (unsigned long)cl;
 	return 0;
 
 failure:
-	if (rtab) qdisc_put_rtab(rtab);
-	if (ctab) qdisc_put_rtab(ctab);
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ctab)
+		qdisc_put_rtab(ctab);
 	return err;
 }
 
@@ -1654,28 +1499,28 @@ static struct tcf_proto **htb_find_tcf(s
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
 	struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
-	HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
+
 	return fl;
 }
 
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
-	u32 classid)
+				     u32 classid)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = htb_find (classid,sch);
-	HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
+	struct htb_class *cl = htb_find(classid, sch);
+
 	/*if (cl && !cl->level) return 0;
-	  The line above used to be there to prevent attaching filters to 
-	  leaves. But at least tc_index filter uses this just to get class 
-	  for other reasons so that we have to allow for it.
-	  ----
-	  19.6.2002 As Werner explained it is ok - bind filter is just
-	  another way to "lock" the class - unlike "get" this lock can
-	  be broken by class during destroy IIUC.
+	   The line above used to be there to prevent attaching filters to
+	   leaves. But at least tc_index filter uses this just to get class
+	   for other reasons so that we have to allow for it.
+	   ----
+	   19.6.2002 As Werner explained it is ok - bind filter is just
+	   another way to "lock" the class - unlike "get" this lock can
+	   be broken by class during destroy IIUC.
 	 */
-	if (cl) 
-		cl->filter_cnt++; 
-	else 
+	if (cl)
+		cl->filter_cnt++;
+	else
 		q->filter_cnt++;
 	return (unsigned long)cl;
 }
@@ -1684,10 +1529,10 @@ static void htb_unbind_filter(struct Qdi
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
-	HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
-	if (cl) 
-		cl->filter_cnt--; 
-	else 
+
+	if (cl)
+		cl->filter_cnt--;
+	else
 		q->filter_cnt--;
 }
 
@@ -1700,9 +1545,10 @@ static void htb_walk(struct Qdisc *sch, 
 		return;
 
 	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *p;
-		list_for_each (p,q->hash+i) {
-			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		struct hlist_node *p;
+		struct htb_class *cl;
+
+		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -1750,12 +1596,13 @@ static struct Qdisc_ops htb_qdisc_ops = 
 
 static int __init htb_module_init(void)
 {
-    return register_qdisc(&htb_qdisc_ops);
+	return register_qdisc(&htb_qdisc_ops);
 }
-static void __exit htb_module_exit(void) 
+static void __exit htb_module_exit(void)
 {
-    unregister_qdisc(&htb_qdisc_ops);
+	unregister_qdisc(&htb_qdisc_ops);
 }
+
 module_init(htb_module_init)
 module_exit(htb_module_exit)
 MODULE_LICENSE("GPL");
diff -puN net/sched/sch_netem.c~git-net net/sched/sch_netem.c
--- a/net/sched/sch_netem.c~git-net
+++ a/net/sched/sch_netem.c
@@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff 
 	 */
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
-		    || (skb->ip_summed == CHECKSUM_HW
-			&& skb_checksum_help(skb, 0))) {
+		    || (skb->ip_summed == CHECKSUM_PARTIAL
+			&& skb_checksum_help(skb))) {
 			sch->qstats.drops++;
 			return NET_XMIT_DROP;
 		}
diff -puN net/socket.c~git-net net/socket.c
--- a/net/socket.c~git-net
+++ a/net/socket.c
@@ -42,7 +42,7 @@
  *		Andi Kleen	:	Some small cleanups, optimizations,
  *					and fixed a copy_from_user() bug.
  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
- *		Tigran Aivazian	:	Made listen(2) backlog sanity checks 
+ *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
  *					protocol-independent
  *
  *
@@ -53,17 +53,17 @@
  *
  *
  *	This module is effectively the top level interface to the BSD socket
- *	paradigm. 
+ *	paradigm.
  *
  *	Based upon Swansea University Computer Society NET3.039
  */
 
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/file.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
+#include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -96,25 +96,24 @@
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
-			 size_t size, loff_t pos);
+			     size_t size, loff_t pos);
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
-			  size_t size, loff_t pos);
-static int sock_mmap(struct file *file, struct vm_area_struct * vma);
+			      size_t size, loff_t pos);
+static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 static int sock_close(struct inode *inode, struct file *file);
 static unsigned int sock_poll(struct file *file,
 			      struct poll_table_struct *wait);
-static long sock_ioctl(struct file *file,
-		      unsigned int cmd, unsigned long arg);
+static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file,
-		      unsigned int cmd, unsigned long arg);
+			      unsigned int cmd, unsigned long arg);
 #endif
 static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 			  unsigned long count, loff_t *ppos);
 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
-			  unsigned long count, loff_t *ppos);
+			   unsigned long count, loff_t *ppos);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 
@@ -147,52 +146,8 @@ static struct file_operations socket_fil
  *	The protocol list. Each protocol is registered in here.
  */
 
-static struct net_proto_family *net_families[NPROTO];
-
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t net_family_lockct = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(net_family_lock);
-
-/* The strategy is: modifications net_family vector are short, do not
-   sleep and veeery rare, but read access should be free of any exclusive
-   locks.
- */
-
-static void net_family_write_lock(void)
-{
-	spin_lock(&net_family_lock);
-	while (atomic_read(&net_family_lockct) != 0) {
-		spin_unlock(&net_family_lock);
-
-		yield();
-
-		spin_lock(&net_family_lock);
-	}
-}
-
-static __inline__ void net_family_write_unlock(void)
-{
-	spin_unlock(&net_family_lock);
-}
-
-static __inline__ void net_family_read_lock(void)
-{
-	atomic_inc(&net_family_lockct);
-	spin_unlock_wait(&net_family_lock);
-}
-
-static __inline__ void net_family_read_unlock(void)
-{
-	atomic_dec(&net_family_lockct);
-}
-
-#else
-#define net_family_write_lock() do { } while(0)
-#define net_family_write_unlock() do { } while(0)
-#define net_family_read_lock() do { } while(0)
-#define net_family_read_unlock() do { } while(0)
-#endif
-
+static const struct net_proto_family *net_families[NPROTO] __read_mostly;
 
 /*
  *	Statistics counters of the socket lists
@@ -201,19 +156,20 @@ static __inline__ void net_family_read_u
 static DEFINE_PER_CPU(int, sockets_in_use) = 0;
 
 /*
- *	Support routines. Move socket addresses back and forth across the kernel/user
- *	divide and look after the messy bits.
+ * Support routines.
+ * Move socket addresses back and forth across the kernel/user
+ * divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain - 
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
 					   16 for IP, 16 for IPX,
 					   24 for IPv6,
-					   about 80 for AX.25 
+					   about 80 for AX.25
 					   must be at least one bigger than
 					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).  
+					   :unix_mkname()).
 					 */
-					 
+
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
@@ -227,11 +183,11 @@ static DEFINE_PER_CPU(int, sockets_in_us
 
 int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
 {
-	if(ulen<0||ulen>MAX_SOCK_ADDR)
+	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
 		return -EINVAL;
-	if(ulen==0)
+	if (ulen == 0)
 		return 0;
-	if(copy_from_user(kaddr,uaddr,ulen))
+	if (copy_from_user(kaddr, uaddr, ulen))
 		return -EFAULT;
 	return audit_sockaddr(ulen, kaddr);
 }
@@ -252,51 +208,52 @@ int move_addr_to_kernel(void __user *uad
  *	length of the data is written over the length limit the user
  *	specified. Zero is returned for a success.
  */
- 
-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen)
+
+int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+		      int __user *ulen)
 {
 	int err;
 	int len;
 
-	if((err=get_user(len, ulen)))
+	err = get_user(len, ulen);
+	if (err)
 		return err;
-	if(len>klen)
-		len=klen;
-	if(len<0 || len> MAX_SOCK_ADDR)
+	if (len > klen)
+		len = klen;
+	if (len < 0 || len > MAX_SOCK_ADDR)
 		return -EINVAL;
-	if(len)
-	{
+	if (len) {
 		if (audit_sockaddr(klen, kaddr))
 			return -ENOMEM;
-		if(copy_to_user(uaddr,kaddr,len))
+		if (copy_to_user(uaddr, kaddr, len))
 			return -EFAULT;
 	}
 	/*
-	 *	"fromlen shall refer to the value before truncation.."
-	 *			1003.1g
+	 *      "fromlen shall refer to the value before truncation.."
+	 *                      1003.1g
 	 */
 	return __put_user(klen, ulen);
 }
 
 #define SOCKFS_MAGIC 0x534F434B
 
-static kmem_cache_t * sock_inode_cachep __read_mostly;
+static kmem_cache_t *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
-	ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+
+	ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
 	init_waitqueue_head(&ei->socket.wait);
-	
+
 	ei->socket.fasync_list = NULL;
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
 	ei->socket.ops = NULL;
 	ei->socket.sk = NULL;
 	ei->socket.file = NULL;
-	ei->socket.flags = 0;
 
 	return &ei->vfs_inode;
 }
@@ -307,22 +264,25 @@ static void sock_destroy_inode(struct in
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	struct socket_alloc *ei = (struct socket_alloc *) foo;
+	struct socket_alloc *ei = (struct socket_alloc *)foo;
 
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
+	    == SLAB_CTOR_CONSTRUCTOR)
 		inode_init_once(&ei->vfs_inode);
 }
- 
+
 static int init_inodecache(void)
 {
 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
-				sizeof(struct socket_alloc),
-				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
-					SLAB_MEM_SPREAD),
-				init_once, NULL);
+					      sizeof(struct socket_alloc),
+					      0,
+					      (SLAB_HWCACHE_ALIGN |
+					       SLAB_RECLAIM_ACCOUNT |
+					       SLAB_MEM_SPREAD),
+					      init_once,
+					      NULL);
 	if (sock_inode_cachep == NULL)
 		return -ENOMEM;
 	return 0;
@@ -335,7 +295,8 @@ static struct super_operations sockfs_op
 };
 
 static int sockfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+			 int flags, const char *dev_name, void *data,
+			 struct vfsmount *mnt)
 {
 	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
 			     mnt);
@@ -348,12 +309,13 @@ static struct file_system_type sock_fs_t
 	.get_sb =	sockfs_get_sb,
 	.kill_sb =	kill_anon_super,
 };
+
 static int sockfs_delete_dentry(struct dentry *dentry)
 {
 	return 1;
 }
 static struct dentry_operations sockfs_dentry_operations = {
-	.d_delete =	sockfs_delete_dentry,
+	.d_delete = sockfs_delete_dentry,
 };
 
 /*
@@ -477,10 +439,12 @@ struct socket *sockfd_lookup(int fd, int
 	struct file *file;
 	struct socket *sock;
 
-	if (!(file = fget(fd))) {
+	file = fget(fd);
+	if (!file) {
 		*err = -EBADF;
 		return NULL;
 	}
+
 	sock = sock_from_file(file, err);
 	if (!sock)
 		fput(file);
@@ -505,7 +469,7 @@ static struct socket *sockfd_lookup_ligh
 
 /**
  *	sock_alloc	-	allocate a socket
- *	
+ *
  *	Allocate a new inode and socket object. The two are bound together
  *	and initialised. The socket is then returned. If we are out of inodes
  *	NULL is returned.
@@ -513,8 +477,8 @@ static struct socket *sockfd_lookup_ligh
 
 static struct socket *sock_alloc(void)
 {
-	struct inode * inode;
-	struct socket * sock;
+	struct inode *inode;
+	struct socket *sock;
 
 	inode = new_inode(sock_mnt->mnt_sb);
 	if (!inode)
@@ -522,7 +486,7 @@ static struct socket *sock_alloc(void)
 
 	sock = SOCKET_I(inode);
 
-	inode->i_mode = S_IFSOCK|S_IRWXUGO;
+	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 
@@ -536,7 +500,7 @@ static struct socket *sock_alloc(void)
  *	a back door. Remember to keep it shut otherwise you'll let the
  *	creepy crawlies in.
  */
-  
+
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 {
 	return -ENXIO;
@@ -553,9 +517,9 @@ const struct file_operations bad_sock_fo
  *
  *	The socket is released from the protocol stack if it has a release
  *	callback, and the inode is then released if the socket is bound to
- *	an inode not a file. 
+ *	an inode not a file.
  */
- 
+
 void sock_release(struct socket *sock)
 {
 	if (sock->ops) {
@@ -575,10 +539,10 @@ void sock_release(struct socket *sock)
 		iput(SOCK_INODE(sock));
 		return;
 	}
-	sock->file=NULL;
+	sock->file = NULL;
 }
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size)
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
@@ -621,14 +585,14 @@ int kernel_sendmsg(struct socket *sock, 
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec,
+	msg->msg_iov = (struct iovec *)vec;
 	msg->msg_iovlen = num;
 	result = sock_sendmsg(sock, msg, size);
 	set_fs(oldfs);
 	return result;
 }
 
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
 	int err;
@@ -647,14 +611,14 @@ static inline int __sock_recvmsg(struct 
 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 }
 
-int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
+int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
 {
 	struct kiocb iocb;
 	struct sock_iocb siocb;
 	int ret;
 
-        init_sync_kiocb(&iocb, NULL);
+	init_sync_kiocb(&iocb, NULL);
 	iocb.private = &siocb;
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
@@ -662,9 +626,8 @@ int sock_recvmsg(struct socket *sock, st
 	return ret;
 }
 
-int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 
-		   struct kvec *vec, size_t num,
-		   size_t size, int flags)
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
+		   struct kvec *vec, size_t num, size_t size, int flags)
 {
 	mm_segment_t oldfs = get_fs();
 	int result;
@@ -674,8 +637,7 @@ int kernel_recvmsg(struct socket *sock, 
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec,
-	msg->msg_iovlen = num;
+	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
 	result = sock_recvmsg(sock, msg, size, flags);
 	set_fs(oldfs);
 	return result;
@@ -702,7 +664,8 @@ static ssize_t sock_sendpage(struct file
 }
 
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-		char __user *ubuf, size_t size, struct sock_iocb *siocb)
+					 char __user *ubuf, size_t size,
+					 struct sock_iocb *siocb)
 {
 	if (!is_sync_kiocb(iocb)) {
 		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -720,20 +683,21 @@ static struct sock_iocb *alloc_sock_iocb
 }
 
 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, struct iovec *iov, unsigned long nr_segs)
+			    struct file *file, struct iovec *iov,
+			    unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
 	size_t size = 0;
 	int i;
 
-        for (i = 0 ; i < nr_segs ; i++)
-                size += iov[i].iov_len;
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iov = (struct iovec *)iov;
 	msg->msg_iovlen = nr_segs;
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
@@ -748,7 +712,7 @@ static ssize_t sock_readv(struct file *f
 	struct msghdr msg;
 	int ret;
 
-        init_sync_kiocb(&iocb, NULL);
+	init_sync_kiocb(&iocb, NULL);
 	iocb.private = &siocb;
 
 	ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
@@ -758,7 +722,7 @@ static ssize_t sock_readv(struct file *f
 }
 
 static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-			 size_t count, loff_t pos)
+			     size_t count, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
@@ -771,24 +735,25 @@ static ssize_t sock_aio_read(struct kioc
 	if (!x)
 		return -ENOMEM;
 	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+			    &x->async_iov, 1);
 }
 
 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, struct iovec *iov, unsigned long nr_segs)
+			     struct file *file, struct iovec *iov,
+			     unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
 	size_t size = 0;
 	int i;
 
-        for (i = 0 ; i < nr_segs ; i++)
-                size += iov[i].iov_len;
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iov = (struct iovec *)iov;
 	msg->msg_iovlen = nr_segs;
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
@@ -815,7 +780,7 @@ static ssize_t sock_writev(struct file *
 }
 
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-			  size_t count, loff_t pos)
+			      size_t count, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
@@ -829,46 +794,48 @@ static ssize_t sock_aio_write(struct kio
 		return -ENOMEM;
 
 	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+			     &x->async_iov, 1);
 }
 
-
 /*
  * Atomic setting of ioctl hooks to avoid race
  * with module unload.
  */
 
 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
 
-void brioctl_set(int (*hook)(unsigned int, void __user *))
+void brioctl_set(int (*hook) (unsigned int, void __user *))
 {
 	mutex_lock(&br_ioctl_mutex);
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(brioctl_set);
 
 static DEFINE_MUTEX(vlan_ioctl_mutex);
-static int (*vlan_ioctl_hook)(void __user *arg);
+static int (*vlan_ioctl_hook) (void __user *arg);
 
-void vlan_ioctl_set(int (*hook)(void __user *))
+void vlan_ioctl_set(int (*hook) (void __user *))
 {
 	mutex_lock(&vlan_ioctl_mutex);
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(vlan_ioctl_set);
 
 static DEFINE_MUTEX(dlci_ioctl_mutex);
-static int (*dlci_ioctl_hook)(unsigned int, void __user *);
+static int (*dlci_ioctl_hook) (unsigned int, void __user *);
 
-void dlci_ioctl_set(int (*hook)(unsigned int, void __user *))
+void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 {
 	mutex_lock(&dlci_ioctl_mutex);
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 /*
@@ -890,8 +857,8 @@ static long sock_ioctl(struct file *file
 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 		err = dev_ioctl(cmd, argp);
 	} else
-#endif	/* CONFIG_WIRELESS_EXT */
-	switch (cmd) {
+#endif				/* CONFIG_WIRELESS_EXT */
+		switch (cmd) {
 		case FIOSETOWN:
 		case SIOCSPGRP:
 			err = -EFAULT;
@@ -901,7 +868,8 @@ static long sock_ioctl(struct file *file
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
-			err = put_user(sock->file->f_owner.pid, (int __user *)argp);
+			err = put_user(sock->file->f_owner.pid,
+				       (int __user *)argp);
 			break;
 		case SIOCGIFBR:
 		case SIOCSIFBR:
@@ -912,7 +880,7 @@ static long sock_ioctl(struct file *file
 				request_module("bridge");
 
 			mutex_lock(&br_ioctl_mutex);
-			if (br_ioctl_hook) 
+			if (br_ioctl_hook)
 				err = br_ioctl_hook(cmd, argp);
 			mutex_unlock(&br_ioctl_mutex);
 			break;
@@ -929,7 +897,7 @@ static long sock_ioctl(struct file *file
 			break;
 		case SIOCGIFDIVERT:
 		case SIOCSIFDIVERT:
-		/* Convert this to call through a hook */
+			/* Convert this to call through a hook */
 			err = divert_ioctl(cmd, argp);
 			break;
 		case SIOCADDDLCI:
@@ -954,7 +922,7 @@ static long sock_ioctl(struct file *file
 			if (err == -ENOIOCTLCMD)
 				err = dev_ioctl(cmd, argp);
 			break;
-	}
+		}
 	return err;
 }
 
@@ -962,7 +930,7 @@ int sock_create_lite(int family, int typ
 {
 	int err;
 	struct socket *sock = NULL;
-	
+
 	err = security_socket_create(family, type, protocol, 1);
 	if (err)
 		goto out;
@@ -973,26 +941,33 @@ int sock_create_lite(int family, int typ
 		goto out;
 	}
 
-	security_socket_post_create(sock, family, type, protocol, 1);
 	sock->type = type;
+	err = security_socket_post_create(sock, family, type, protocol, 1);
+	if (err)
+		goto out_release;
+
 out:
 	*res = sock;
 	return err;
+out_release:
+	sock_release(sock);
+	sock = NULL;
+	goto out;
 }
 
 /* No kernel lock held - perfect */
-static unsigned int sock_poll(struct file *file, poll_table * wait)
+static unsigned int sock_poll(struct file *file, poll_table *wait)
 {
 	struct socket *sock;
 
 	/*
-	 *	We can't return errors to poll, so it's either yes or no. 
+	 *      We can't return errors to poll, so it's either yes or no.
 	 */
 	sock = file->private_data;
 	return sock->ops->poll(file, sock, wait);
 }
 
-static int sock_mmap(struct file * file, struct vm_area_struct * vma)
+static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct socket *sock = file->private_data;
 
@@ -1002,12 +977,11 @@ static int sock_mmap(struct file * file,
 static int sock_close(struct inode *inode, struct file *filp)
 {
 	/*
-	 *	It was possible the inode is NULL we were 
-	 *	closing an unfinished socket. 
+	 *      It was possible the inode is NULL we were
+	 *      closing an unfinished socket.
 	 */
 
-	if (!inode)
-	{
+	if (!inode) {
 		printk(KERN_DEBUG "sock_close: NULL inode\n");
 		return 0;
 	}
@@ -1033,57 +1007,52 @@ static int sock_close(struct inode *inod
 
 static int sock_fasync(int fd, struct file *filp, int on)
 {
-	struct fasync_struct *fa, *fna=NULL, **prev;
+	struct fasync_struct *fa, *fna = NULL, **prev;
 	struct socket *sock;
 	struct sock *sk;
 
-	if (on)
-	{
+	if (on) {
 		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-		if(fna==NULL)
+		if (fna == NULL)
 			return -ENOMEM;
 	}
 
 	sock = filp->private_data;
 
-	if ((sk=sock->sk) == NULL) {
+	sk = sock->sk;
+	if (sk == NULL) {
 		kfree(fna);
 		return -EINVAL;
 	}
 
 	lock_sock(sk);
 
-	prev=&(sock->fasync_list);
+	prev = &(sock->fasync_list);
 
-	for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
-		if (fa->fa_file==filp)
+	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
+		if (fa->fa_file == filp)
 			break;
 
-	if(on)
-	{
-		if(fa!=NULL)
-		{
+	if (on) {
+		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
-			fa->fa_fd=fd;
+			fa->fa_fd = fd;
 			write_unlock_bh(&sk->sk_callback_lock);
 
 			kfree(fna);
 			goto out;
 		}
-		fna->fa_file=filp;
-		fna->fa_fd=fd;
-		fna->magic=FASYNC_MAGIC;
-		fna->fa_next=sock->fasync_list;
+		fna->fa_file = filp;
+		fna->fa_fd = fd;
+		fna->magic = FASYNC_MAGIC;
+		fna->fa_next = sock->fasync_list;
 		write_lock_bh(&sk->sk_callback_lock);
-		sock->fasync_list=fna;
+		sock->fasync_list = fna;
 		write_unlock_bh(&sk->sk_callback_lock);
-	}
-	else
-	{
-		if (fa!=NULL)
-		{
+	} else {
+		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
-			*prev=fa->fa_next;
+			*prev = fa->fa_next;
 			write_unlock_bh(&sk->sk_callback_lock);
 			kfree(fa);
 		}
@@ -1100,10 +1069,9 @@ int sock_wake_async(struct socket *sock,
 {
 	if (!sock || !sock->fasync_list)
 		return -1;
-	switch (how)
-	{
+	switch (how) {
 	case 1:
-		
+
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 			break;
 		goto call_kill;
@@ -1112,7 +1080,7 @@ int sock_wake_async(struct socket *sock,
 			break;
 		/* fall through */
 	case 0:
-	call_kill:
+call_kill:
 		__kill_fasync(sock->fasync_list, SIGIO, band);
 		break;
 	case 3:
@@ -1121,13 +1089,15 @@ int sock_wake_async(struct socket *sock,
 	return 0;
 }
 
-static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+static int __sock_create(int family, int type, int protocol,
+			 struct socket **res, int kern)
 {
 	int err;
 	struct socket *sock;
+	const struct net_proto_family *pf;
 
 	/*
-	 *	Check protocol is in range
+	 *      Check protocol is in range
 	 */
 	if (family < 0 || family >= NPROTO)
 		return -EAFNOSUPPORT;
@@ -1140,10 +1110,11 @@ static int __sock_create(int family, int
 	   deadlock in module load.
 	 */
 	if (family == PF_INET && type == SOCK_PACKET) {
-		static int warned; 
+		static int warned;
 		if (!warned) {
 			warned = 1;
-			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
+			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+			       current->comm);
 		}
 		family = PF_PACKET;
 	}
@@ -1151,78 +1122,83 @@ static int __sock_create(int family, int
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
-		
+
+	/*
+	 *	Allocate the socket and allow the family to set things up. if
+	 *	the protocol is 0, the family is instructed to select an appropriate
+	 *	default.
+	 */
+	sock = sock_alloc();
+	if (!sock) {
+		printk(KERN_WARNING "socket: no more sockets\n");
+		return -ENFILE;	/* Not exactly a match, but its the
+				   closest posix thing */
+	}
+
+	sock->type = type;
+
 #if defined(CONFIG_KMOD)
-	/* Attempt to load a protocol module if the find failed. 
-	 * 
-	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
+	/* Attempt to load a protocol module if the find failed.
+	 *
+	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
 	 * requested real, full-featured networking support upon configuration.
 	 * Otherwise module support will break!
 	 */
-	if (net_families[family]==NULL)
-	{
-		request_module("net-pf-%d",family);
-	}
+	if (net_families[family] == NULL)
+		request_module("net-pf-%d", family);
 #endif
 
-	net_family_read_lock();
-	if (net_families[family] == NULL) {
-		err = -EAFNOSUPPORT;
-		goto out;
-	}
-
-/*
- *	Allocate the socket and allow the family to set things up. if
- *	the protocol is 0, the family is instructed to select an appropriate
- *	default.
- */
-
-	if (!(sock = sock_alloc())) {
-		printk(KERN_WARNING "socket: no more sockets\n");
-		err = -ENFILE;		/* Not exactly a match, but its the
-					   closest posix thing */
-		goto out;
-	}
-
-	sock->type  = type;
+	rcu_read_lock();
+	pf = rcu_dereference(net_families[family]);
+	err = -EAFNOSUPPORT;
+	if (!pf)
+		goto out_release;
 
 	/*
 	 * We will call the ->create function, that possibly is in a loadable
 	 * module, so we have to bump that loadable module refcnt first.
 	 */
-	err = -EAFNOSUPPORT;
-	if (!try_module_get(net_families[family]->owner))
+	if (!try_module_get(pf->owner))
 		goto out_release;
 
-	if ((err = net_families[family]->create(sock, protocol)) < 0) {
-		sock->ops = NULL;
+	/* Now protected by module ref count */
+	rcu_read_unlock();
+
+	err = pf->create(sock, protocol);
+	if (err < 0)
 		goto out_module_put;
-	}
 
 	/*
 	 * Now to bump the refcnt of the [loadable] module that owns this
 	 * socket at sock_release time we decrement its refcnt.
 	 */
-	if (!try_module_get(sock->ops->owner)) {
-		sock->ops = NULL;
-		goto out_module_put;
-	}
+	if (!try_module_get(sock->ops->owner))
+		goto out_module_busy;
+
 	/*
 	 * Now that we're done with the ->create function, the [loadable]
 	 * module can have its refcnt decremented
 	 */
-	module_put(net_families[family]->owner);
+	module_put(pf->owner);
+	err = security_socket_post_create(sock, family, type, protocol, kern);
+	if (err)
+		goto out_release;
 	*res = sock;
-	security_socket_post_create(sock, family, type, protocol, kern);
 
-out:
-	net_family_read_unlock();
-	return err;
+	return 0;
+
+out_module_busy:
+	err = -EAFNOSUPPORT;
 out_module_put:
-	module_put(net_families[family]->owner);
-out_release:
+	sock->ops = NULL;
+	module_put(pf->owner);
+out_sock_release:
 	sock_release(sock);
-	goto out;
+	return err;
+
+out_release:
+	rcu_read_unlock();
+	goto out_sock_release;
 }
 
 int sock_create(int family, int type, int protocol, struct socket **res)
@@ -1261,7 +1237,8 @@ out_release:
  *	Create a pair of connected sockets.
  */
 
-asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
+asmlinkage long sys_socketpair(int family, int type, int protocol,
+			       int __user *usockvec)
 {
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
@@ -1280,7 +1257,7 @@ asmlinkage long sys_socketpair(int famil
 		goto out_release_1;
 
 	err = sock1->ops->socketpair(sock1, sock2);
-	if (err < 0) 
+	if (err < 0)
 		goto out_release_both;
 
 	fd1 = fd2 = -1;
@@ -1299,7 +1276,7 @@ asmlinkage long sys_socketpair(int famil
 	 * Not kernel problem.
 	 */
 
-	err = put_user(fd1, &usockvec[0]); 
+	err = put_user(fd1, &usockvec[0]);
 	if (!err)
 		err = put_user(fd2, &usockvec[1]);
 	if (!err)
@@ -1310,19 +1287,18 @@ asmlinkage long sys_socketpair(int famil
 	return err;
 
 out_close_1:
-        sock_release(sock2);
+	sock_release(sock2);
 	sys_close(fd1);
 	return err;
 
 out_release_both:
-        sock_release(sock2);
+	sock_release(sock2);
 out_release_1:
-        sock_release(sock1);
+	sock_release(sock1);
 out:
 	return err;
 }
 
-
 /*
  *	Bind a name to a socket. Nothing much to do here since it's
  *	the protocol's responsibility to handle the local address.
@@ -1337,20 +1313,23 @@ asmlinkage long sys_bind(int fd, struct 
 	char address[MAX_SOCK_ADDR];
 	int err, fput_needed;
 
-	if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
-	{
-		if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
-			err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if(sock) {
+		err = move_addr_to_kernel(umyaddr, addrlen, address);
+		if (err >= 0) {
+			err = security_socket_bind(sock,
+						   (struct sockaddr *)address,
+						   addrlen);
 			if (!err)
 				err = sock->ops->bind(sock,
-					(struct sockaddr *)address, addrlen);
+						      (struct sockaddr *)
+						      address, addrlen);
 		}
 		fput_light(sock->file, fput_needed);
-	}			
+	}
 	return err;
 }
 
-
 /*
  *	Perform a listen. Basically, we allow the protocol to do anything
  *	necessary for a listen, and if that works, we mark the socket as
@@ -1363,9 +1342,10 @@ asmlinkage long sys_listen(int fd, int b
 {
 	struct socket *sock;
 	int err, fput_needed;
-	
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
-		if ((unsigned) backlog > sysctl_somaxconn)
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock) {
+		if ((unsigned)backlog > sysctl_somaxconn)
 			backlog = sysctl_somaxconn;
 
 		err = security_socket_listen(sock, backlog);
@@ -1377,7 +1357,6 @@ asmlinkage long sys_listen(int fd, int b
 	return err;
 }
 
-
 /*
  *	For accept, we attempt to create a new socket, set up the link
  *	with the client, wake up the client, then return the new
@@ -1390,7 +1369,8 @@ asmlinkage long sys_listen(int fd, int b
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
@@ -1402,7 +1382,7 @@ asmlinkage long sys_accept(int fd, struc
 		goto out;
 
 	err = -ENFILE;
-	if (!(newsock = sock_alloc())) 
+	if (!(newsock = sock_alloc()))
 		goto out_put;
 
 	newsock->type = sock->type;
@@ -1434,11 +1414,13 @@ asmlinkage long sys_accept(int fd, struc
 		goto out_fd;
 
 	if (upeer_sockaddr) {
-		if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
+		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+					  &len, 2) < 0) {
 			err = -ECONNABORTED;
 			goto out_fd;
 		}
-		err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
+		err = move_addr_to_user(address, len, upeer_sockaddr,
+					upeer_addrlen);
 		if (err < 0)
 			goto out_fd;
 	}
@@ -1460,7 +1442,6 @@ out_fd:
 	goto out_put;
 }
 
-
 /*
  *	Attempt to connect to a socket with the server address.  The address
  *	is in user space so we verify it is OK and move it to kernel space.
@@ -1473,7 +1454,8 @@ out_fd:
  *	include the -EINPROGRESS status for such sockets.
  */
 
-asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
+asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
+			    int addrlen)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
@@ -1486,11 +1468,12 @@ asmlinkage long sys_connect(int fd, stru
 	if (err < 0)
 		goto out_put;
 
-	err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+	err =
+	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
 	if (err)
 		goto out_put;
 
-	err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
 				 sock->file->f_flags);
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1503,12 +1486,13 @@ out:
  *	name to user space.
  */
 
-asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	int len, err, fput_needed;
-	
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1533,22 +1517,27 @@ out:
  *	name to user space.
  */
 
-asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	int len, err, fput_needed;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_getpeername(sock);
 		if (err) {
 			fput_light(sock->file, fput_needed);
 			return err;
 		}
 
-		err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
+		err =
+		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
+				       1);
 		if (!err)
-			err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
+			err = move_addr_to_user(address, len, usockaddr,
+						usockaddr_len);
 		fput_light(sock->file, fput_needed);
 	}
 	return err;
@@ -1560,8 +1549,9 @@ asmlinkage long sys_getpeername(int fd, 
  *	the protocol.
  */
 
-asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
-			   struct sockaddr __user *addr, int addr_len)
+asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
+			   unsigned flags, struct sockaddr __user *addr,
+			   int addr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
@@ -1578,54 +1568,55 @@ asmlinkage long sys_sendto(int fd, void 
 	sock = sock_from_file(sock_file, &err);
 	if (!sock)
 		goto out_put;
-	iov.iov_base=buff;
-	iov.iov_len=len;
-	msg.msg_name=NULL;
-	msg.msg_iov=&iov;
-	msg.msg_iovlen=1;
-	msg.msg_control=NULL;
-	msg.msg_controllen=0;
-	msg.msg_namelen=0;
+	iov.iov_base = buff;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
 	if (addr) {
 		err = move_addr_to_kernel(addr, addr_len, address);
 		if (err < 0)
 			goto out_put;
-		msg.msg_name=address;
-		msg.msg_namelen=addr_len;
+		msg.msg_name = address;
+		msg.msg_namelen = addr_len;
 	}
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	msg.msg_flags = flags;
 	err = sock_sendmsg(sock, &msg, len);
 
-out_put:		
+out_put:
 	fput_light(sock_file, fput_needed);
 	return err;
 }
 
 /*
- *	Send a datagram down a socket. 
+ *	Send a datagram down a socket.
  */
 
-asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
+asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
 {
 	return sys_sendto(fd, buff, len, flags, NULL, 0);
 }
 
 /*
- *	Receive a frame from the socket and optionally record the address of the 
+ *	Receive a frame from the socket and optionally record the address of the
  *	sender. We verify the buffers are writable and if needed move the
  *	sender address from kernel to user space.
  */
 
-asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
-			     struct sockaddr __user *addr, int __user *addr_len)
+asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
+			     unsigned flags, struct sockaddr __user *addr,
+			     int __user *addr_len)
 {
 	struct socket *sock;
 	struct iovec iov;
 	struct msghdr msg;
 	char address[MAX_SOCK_ADDR];
-	int err,err2;
+	int err, err2;
 	struct file *sock_file;
 	int fput_needed;
 
@@ -1637,23 +1628,22 @@ asmlinkage long sys_recvfrom(int fd, voi
 	if (!sock)
 		goto out;
 
-	msg.msg_control=NULL;
-	msg.msg_controllen=0;
-	msg.msg_iovlen=1;
-	msg.msg_iov=&iov;
-	iov.iov_len=size;
-	iov.iov_base=ubuf;
-	msg.msg_name=address;
-	msg.msg_namelen=MAX_SOCK_ADDR;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = &iov;
+	iov.iov_len = size;
+	iov.iov_base = ubuf;
+	msg.msg_name = address;
+	msg.msg_namelen = MAX_SOCK_ADDR;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
-	err=sock_recvmsg(sock, &msg, size, flags);
+	err = sock_recvmsg(sock, &msg, size, flags);
 
-	if(err >= 0 && addr != NULL)
-	{
-		err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
-		if(err2<0)
-			err=err2;
+	if (err >= 0 && addr != NULL) {
+		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+		if (err2 < 0)
+			err = err2;
 	}
 out:
 	fput_light(sock_file, fput_needed);
@@ -1661,10 +1651,11 @@ out:
 }
 
 /*
- *	Receive a datagram from a socket. 
+ *	Receive a datagram from a socket.
  */
 
-asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
+asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
+			 unsigned flags)
 {
 	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
 }
@@ -1674,24 +1665,29 @@ asmlinkage long sys_recv(int fd, void __
  *	to pass the user mode parameter for the protocols to sort out.
  */
 
-asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
+asmlinkage long sys_setsockopt(int fd, int level, int optname,
+			       char __user *optval, int optlen)
 {
 	int err, fput_needed;
 	struct socket *sock;
 
 	if (optlen < 0)
 		return -EINVAL;
-			
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err)
 			goto out_put;
 
 		if (level == SOL_SOCKET)
-			err=sock_setsockopt(sock,level,optname,optval,optlen);
+			err =
+			    sock_setsockopt(sock, level, optname, optval,
+					    optlen);
 		else
-			err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
+			err =
+			    sock->ops->setsockopt(sock, level, optname, optval,
+						  optlen);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
@@ -1703,27 +1699,32 @@ out_put:
  *	to pass a user mode parameter for the protocols to sort out.
  */
 
-asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen)
+asmlinkage long sys_getsockopt(int fd, int level, int optname,
+			       char __user *optval, int __user *optlen)
 {
 	int err, fput_needed;
 	struct socket *sock;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_getsockopt(sock, level, optname);
 		if (err)
 			goto out_put;
 
 		if (level == SOL_SOCKET)
-			err=sock_getsockopt(sock,level,optname,optval,optlen);
+			err =
+			    sock_getsockopt(sock, level, optname, optval,
+					    optlen);
 		else
-			err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
+			err =
+			    sock->ops->getsockopt(sock, level, optname, optval,
+						  optlen);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
 	return err;
 }
 
-
 /*
  *	Shutdown a socket.
  */
@@ -1733,8 +1734,8 @@ asmlinkage long sys_shutdown(int fd, int
 	int err, fput_needed;
 	struct socket *sock;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
-	{
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_shutdown(sock, how);
 		if (!err)
 			err = sock->ops->shutdown(sock, how);
@@ -1743,41 +1744,42 @@ asmlinkage long sys_shutdown(int fd, int
 	return err;
 }
 
-/* A couple of helpful macros for getting the address of the 32/64 bit 
+/* A couple of helpful macros for getting the address of the 32/64 bit
  * fields which are the same type (int / unsigned) on our platforms.
  */
 #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
 #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
 #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
 
-
 /*
  *	BSD sendmsg interface
  */
 
 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 {
-	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
-			__attribute__ ((aligned (sizeof(__kernel_size_t))));
-			/* 20 is size of ipv6_pktinfo */
+	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
+	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
 	struct msghdr msg_sys;
 	int err, ctl_len, iov_size, total_len;
 	int fput_needed;
-	
+
 	err = -EFAULT;
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock) 
+	if (!sock)
 		goto out;
 
 	/* do not move before msg_sys is valid */
@@ -1785,7 +1787,7 @@ asmlinkage long sys_sendmsg(int fd, stru
 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
 		goto out_put;
 
-	/* Check whether to allocate the iovec area*/
+	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1799,7 +1801,7 @@ asmlinkage long sys_sendmsg(int fd, stru
 		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
 	} else
 		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
-	if (err < 0) 
+	if (err < 0)
 		goto out_freeiov;
 	total_len = err;
 
@@ -1807,18 +1809,19 @@ asmlinkage long sys_sendmsg(int fd, stru
 
 	if (msg_sys.msg_controllen > INT_MAX)
 		goto out_freeiov;
-	ctl_len = msg_sys.msg_controllen; 
+	ctl_len = msg_sys.msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
-		err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl));
+		err =
+		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+						     sizeof(ctl));
 		if (err)
 			goto out_freeiov;
 		ctl_buf = msg_sys.msg_control;
 		ctl_len = msg_sys.msg_controllen;
 	} else if (ctl_len) {
-		if (ctl_len > sizeof(ctl))
-		{
+		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
-			if (ctl_buf == NULL) 
+			if (ctl_buf == NULL)
 				goto out_freeiov;
 		}
 		err = -EFAULT;
@@ -1827,7 +1830,8 @@ asmlinkage long sys_sendmsg(int fd, stru
 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
 		 * checking falls down on this.
 		 */
-		if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
+		if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
+				   ctl_len))
 			goto out_freectl;
 		msg_sys.msg_control = ctl_buf;
 	}
@@ -1838,14 +1842,14 @@ asmlinkage long sys_sendmsg(int fd, stru
 	err = sock_sendmsg(sock, &msg_sys, total_len);
 
 out_freectl:
-	if (ctl_buf != ctl)    
+	if (ctl_buf != ctl)
 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
 out_put:
 	fput_light(sock->file, fput_needed);
-out:       
+out:
 	return err;
 }
 
@@ -1853,12 +1857,14 @@ out:       
  *	BSD recvmsg interface
  */
 
-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
+asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
+			    unsigned int flags)
 {
-	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
 	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov=iovstack;
+	struct iovec *iov = iovstack;
 	struct msghdr msg_sys;
 	unsigned long cmsg_ptr;
 	int err, iov_size, total_len, len;
@@ -1870,13 +1876,13 @@ asmlinkage long sys_recvmsg(int fd, stru
 	/* user mode address pointers */
 	struct sockaddr __user *uaddr;
 	int __user *uaddr_len;
-	
+
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	} else
-		if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
-			return -EFAULT;
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
@@ -1885,8 +1891,8 @@ asmlinkage long sys_recvmsg(int fd, stru
 	err = -EMSGSIZE;
 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
 		goto out_put;
-	
-	/* Check whether to allocate the iovec area*/
+
+	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1896,11 +1902,11 @@ asmlinkage long sys_recvmsg(int fd, stru
 	}
 
 	/*
-	 *	Save the user-mode address (verify_iovec will change the
-	 *	kernel msghdr to use the kernel address space)
+	 *      Save the user-mode address (verify_iovec will change the
+	 *      kernel msghdr to use the kernel address space)
 	 */
-	 
-	uaddr = (void __user *) msg_sys.msg_name;
+
+	uaddr = (void __user *)msg_sys.msg_name;
 	uaddr_len = COMPAT_NAMELEN(msg);
 	if (MSG_CMSG_COMPAT & flags) {
 		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
@@ -1908,13 +1914,13 @@ asmlinkage long sys_recvmsg(int fd, stru
 		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
 	if (err < 0)
 		goto out_freeiov;
-	total_len=err;
+	total_len = err;
 
 	cmsg_ptr = (unsigned long)msg_sys.msg_control;
 	msg_sys.msg_flags = 0;
 	if (MSG_CMSG_COMPAT & flags)
 		msg_sys.msg_flags = MSG_CMSG_COMPAT;
-	
+
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
@@ -1923,7 +1929,8 @@ asmlinkage long sys_recvmsg(int fd, stru
 	len = err;
 
 	if (uaddr != NULL) {
-		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+					uaddr_len);
 		if (err < 0)
 			goto out_freeiov;
 	}
@@ -1932,10 +1939,10 @@ asmlinkage long sys_recvmsg(int fd, stru
 	if (err)
 		goto out_freeiov;
 	if (MSG_CMSG_COMPAT & flags)
-		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
 				 &msg_compat->msg_controllen);
 	else
-		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
 				 &msg->msg_controllen);
 	if (err)
 		goto out_freeiov;
@@ -1954,163 +1961,187 @@ out:
 
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+static const unsigned char nargs[18]={
+	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+};
+
 #undef AL
 
 /*
- *	System call vectors. 
+ *	System call vectors.
  *
  *	Argument checking cleaned up. Saved 20% in size.
  *  This function doesn't need to set the kernel lock because
- *  it is set by the callees. 
+ *  it is set by the callees.
  */
 
 asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 {
 	unsigned long a[6];
-	unsigned long a0,a1;
+	unsigned long a0, a1;
 	int err;
 
-	if(call<1||call>SYS_RECVMSG)
+	if (call < 1 || call > SYS_RECVMSG)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
 	if (copy_from_user(a, args, nargs[call]))
 		return -EFAULT;
 
-	err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
+	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
 	if (err)
 		return err;
 
-	a0=a[0];
-	a1=a[1];
-	
-	switch(call) 
-	{
-		case SYS_SOCKET:
-			err = sys_socket(a0,a1,a[2]);
-			break;
-		case SYS_BIND:
-			err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
-			break;
-		case SYS_CONNECT:
-			err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
-			break;
-		case SYS_LISTEN:
-			err = sys_listen(a0,a1);
-			break;
-		case SYS_ACCEPT:
-			err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_GETSOCKNAME:
-			err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_GETPEERNAME:
-			err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_SOCKETPAIR:
-			err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
-			break;
-		case SYS_SEND:
-			err = sys_send(a0, (void __user *)a1, a[2], a[3]);
-			break;
-		case SYS_SENDTO:
-			err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
-					 (struct sockaddr __user *)a[4], a[5]);
-			break;
-		case SYS_RECV:
-			err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
-			break;
-		case SYS_RECVFROM:
-			err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
-					   (struct sockaddr __user *)a[4], (int __user *)a[5]);
-			break;
-		case SYS_SHUTDOWN:
-			err = sys_shutdown(a0,a1);
-			break;
-		case SYS_SETSOCKOPT:
-			err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
-			break;
-		case SYS_GETSOCKOPT:
-			err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
-			break;
-		case SYS_SENDMSG:
-			err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
-			break;
-		case SYS_RECVMSG:
-			err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
-			break;
-		default:
-			err = -EINVAL;
-			break;
+	a0 = a[0];
+	a1 = a[1];
+
+	switch (call) {
+	case SYS_SOCKET:
+		err = sys_socket(a0, a1, a[2]);
+		break;
+	case SYS_BIND:
+		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_CONNECT:
+		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_LISTEN:
+		err = sys_listen(a0, a1);
+		break;
+	case SYS_ACCEPT:
+		err =
+		    sys_accept(a0, (struct sockaddr __user *)a1,
+			       (int __user *)a[2]);
+		break;
+	case SYS_GETSOCKNAME:
+		err =
+		    sys_getsockname(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_GETPEERNAME:
+		err =
+		    sys_getpeername(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_SOCKETPAIR:
+		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+		break;
+	case SYS_SEND:
+		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_SENDTO:
+		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
+				 (struct sockaddr __user *)a[4], a[5]);
+		break;
+	case SYS_RECV:
+		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_RECVFROM:
+		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+				   (struct sockaddr __user *)a[4],
+				   (int __user *)a[5]);
+		break;
+	case SYS_SHUTDOWN:
+		err = sys_shutdown(a0, a1);
+		break;
+	case SYS_SETSOCKOPT:
+		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+		break;
+	case SYS_GETSOCKOPT:
+		err =
+		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+				   (int __user *)a[4]);
+		break;
+	case SYS_SENDMSG:
+		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	case SYS_RECVMSG:
+		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	default:
+		err = -EINVAL;
+		break;
 	}
 	return err;
 }
 
-#endif /* __ARCH_WANT_SYS_SOCKETCALL */
+#endif				/* __ARCH_WANT_SYS_SOCKETCALL */
 
-/*
+/**
+ *	sock_register - add a socket protocol handler
+ *	@ops: description of protocol
+ *
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
- *	SOCKET module.
+ *	socket interface. The value ops->family coresponds to the
+ *	socket system call protocol family.
  */
-
-int sock_register(struct net_proto_family *ops)
+int sock_register(const struct net_proto_family *ops)
 {
 	int err;
 
 	if (ops->family >= NPROTO) {
-		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
+		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
+		       NPROTO);
 		return -ENOBUFS;
 	}
-	net_family_write_lock();
-	err = -EEXIST;
-	if (net_families[ops->family] == NULL) {
-		net_families[ops->family]=ops;
+
+	spin_lock(&net_family_lock);
+	if (net_families[ops->family])
+		err = -EEXIST;
+	else {
+		net_families[ops->family] = ops;
 		err = 0;
 	}
-	net_family_write_unlock();
-	printk(KERN_INFO "NET: Registered protocol family %d\n",
-	       ops->family);
+	spin_unlock(&net_family_lock);
+
+	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
 
-/*
+/**
+ *	sock_unregister - remove a protocol handler
+ *	@family: protocol family to remove
+ *
  *	This function is called by a protocol handler that wants to
  *	remove its address family, and have it unlinked from the
- *	SOCKET module.
+ *	new socket creation.
+ *
+ *	If protocol handler is a module, then it can use module reference
+ *	counts to protect against new references. If protocol handler is not
+ *	a module then it needs to provide its own protection in
+ *	the ops->create routine.
  */
-
-int sock_unregister(int family)
+void sock_unregister(int family)
 {
-	if (family < 0 || family >= NPROTO)
-		return -1;
+	BUG_ON(family < 0 || family >= NPROTO);
 
-	net_family_write_lock();
-	net_families[family]=NULL;
-	net_family_write_unlock();
-	printk(KERN_INFO "NET: Unregistered protocol family %d\n",
-	       family);
-	return 0;
+	spin_lock(&net_family_lock);
+	net_families[family] = NULL;
+	spin_unlock(&net_family_lock);
+
+	synchronize_rcu();
+
+	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 }
 
 static int __init sock_init(void)
 {
 	/*
-	 *	Initialize sock SLAB cache.
+	 *      Initialize sock SLAB cache.
 	 */
-	 
+
 	sk_init();
 
 	/*
-	 *	Initialize skbuff SLAB cache 
+	 *      Initialize skbuff SLAB cache
 	 */
 	skb_init();
 
 	/*
-	 *	Initialize the protocols module. 
+	 *      Initialize the protocols module.
 	 */
 
 	init_inodecache();
@@ -2136,7 +2167,7 @@ void socket_seq_show(struct seq_file *se
 	int counter = 0;
 
 	for_each_possible_cpu(cpu)
-		counter += per_cpu(sockets_in_use, cpu);
+	    counter += per_cpu(sockets_in_use, cpu);
 
 	/* It can be negative, by the way. 8) */
 	if (counter < 0)
@@ -2144,11 +2175,11 @@ void socket_seq_show(struct seq_file *se
 
 	seq_printf(seq, "sockets: used %d\n", counter);
 }
-#endif /* CONFIG_PROC_FS */
+#endif				/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file, unsigned cmd,
-				unsigned long arg)
+			      unsigned long arg)
 {
 	struct socket *sock = file->private_data;
 	int ret = -ENOIOCTLCMD;
@@ -2160,6 +2191,109 @@ static long compat_sock_ioctl(struct fil
 }
 #endif
 
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+	return sock->ops->bind(sock, addr, addrlen);
+}
+
+int kernel_listen(struct socket *sock, int backlog)
+{
+	return sock->ops->listen(sock, backlog);
+}
+
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+			       newsock);
+	if (err < 0)
+		goto done;
+
+	err = sock->ops->accept(sock, *newsock, flags);
+	if (err < 0) {
+		sock_release(*newsock);
+		goto done;
+	}
+
+	(*newsock)->ops = sock->ops;
+
+done:
+	return err;
+}
+
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+                   int flags)
+{
+	return sock->ops->connect(sock, addr, addrlen, flags);
+}
+
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 0);
+}
+
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 1);
+}
+
+int kernel_getsockopt(struct socket *sock, int level, int optname,
+			char *optval, int *optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_getsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->getsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_setsockopt(struct socket *sock, int level, int optname,
+			char *optval, int optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_setsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->setsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags)
+{
+	if (sock->ops->sendpage)
+		return sock->ops->sendpage(sock, page, offset, size, flags);
+
+	return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	err = sock->ops->ioctl(sock, cmd, arg);
+	set_fs(oldfs);
+
+	return err;
+}
+
 /* ABI emulation layers need these two */
 EXPORT_SYMBOL(move_addr_to_kernel);
 EXPORT_SYMBOL(move_addr_to_user);
@@ -2176,3 +2310,13 @@ EXPORT_SYMBOL(sock_wake_async);
 EXPORT_SYMBOL(sockfd_lookup);
 EXPORT_SYMBOL(kernel_sendmsg);
 EXPORT_SYMBOL(kernel_recvmsg);
+EXPORT_SYMBOL(kernel_bind);
+EXPORT_SYMBOL(kernel_listen);
+EXPORT_SYMBOL(kernel_accept);
+EXPORT_SYMBOL(kernel_connect);
+EXPORT_SYMBOL(kernel_getsockname);
+EXPORT_SYMBOL(kernel_getpeername);
+EXPORT_SYMBOL(kernel_getsockopt);
+EXPORT_SYMBOL(kernel_setsockopt);
+EXPORT_SYMBOL(kernel_sendpage);
+EXPORT_SYMBOL(kernel_sock_ioctl);
diff -puN net/sunrpc/socklib.c~git-net net/sunrpc/socklib.c
--- a/net/sunrpc/socklib.c~git-net
+++ a/net/sunrpc/socklib.c
@@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_
 		return -1;
 	if ((unsigned short)csum_fold(desc.csum))
 		return -1;
-	if (unlikely(skb->ip_summed == CHECKSUM_HW))
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:
diff -puN net/sunrpc/svcsock.c~git-net net/sunrpc/svcsock.c
--- a/net/sunrpc/svcsock.c~git-net
+++ a/net/sunrpc/svcsock.c
@@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	/* send head */
 	if (slen == xdr->head[0].iov_len)
 		flags = 0;
-	len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
+	len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
 	if (len != xdr->head[0].iov_len)
 		goto out;
 	slen -= xdr->head[0].iov_len;
@@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	while (pglen > 0) {
 		if (slen == size)
 			flags = 0;
-		result = sock->ops->sendpage(sock, *ppage, base, size, flags);
+		result = kernel_sendpage(sock, *ppage, base, size, flags);
 		if (result > 0)
 			len += result;
 		if (result != size)
@@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	}
 	/* send tail */
 	if (xdr->tail[0].iov_len) {
-		result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], 
+		result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
 					     ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
 					     xdr->tail[0].iov_len, 0);
 
@@ -434,13 +434,10 @@ out:
 static int
 svc_recv_available(struct svc_sock *svsk)
 {
-	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
 
-	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
-	set_fs(oldfs);
+	err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
 
 	return (err >= 0)? avail : err;
 }
@@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 	 * at accept time. FIXME
 	 */
 	alen = sizeof(rqstp->rq_addr);
-	sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
+	kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
 
 	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
 		rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk)
 	struct svc_serv	*serv = svsk->sk_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
-	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
 
@@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk)
 	if (!sock)
 		return;
 
-	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
-	if (err) {
+	clear_bit(SK_CONN, &svsk->sk_flags);
+	err = kernel_accept(sock, &newsock, O_NONBLOCK);
+	if (err < 0) {
 		if (err == -ENOMEM)
 			printk(KERN_WARNING "%s: no more sockets!\n",
 			       serv->sv_name);
-		return;
-	}
-
-	dprintk("svc: tcp_accept %p allocated\n", newsock);
-	newsock->ops = ops = sock->ops;
-
-	clear_bit(SK_CONN, &svsk->sk_flags);
-	if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
-		if (err != -EAGAIN && net_ratelimit())
+		else if (err != -EAGAIN && net_ratelimit())
 			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
 				   serv->sv_name, -err);
-		goto failed;		/* aborted connection or whatever */
+		return;
 	}
+
 	set_bit(SK_CONN, &svsk->sk_flags);
 	svc_sock_enqueue(svsk);
 
 	slen = sizeof(sin);
-	err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
+	err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
 	if (err < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: peername failed (err %d)!\n",
@@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv,
 	if (sin != NULL) {
 		if (type == SOCK_STREAM)
 			sock->sk->sk_reuse = 1; /* allow address reuse */
-		error = sock->ops->bind(sock, (struct sockaddr *) sin,
+		error = kernel_bind(sock, (struct sockaddr *) sin,
 						sizeof(*sin));
 		if (error < 0)
 			goto bummer;
 	}
 
 	if (protocol == IPPROTO_TCP) {
-		if ((error = sock->ops->listen(sock, 64)) < 0)
+		if ((error = kernel_listen(sock, 64)) < 0)
 			goto bummer;
 	}
 
diff -puN net/sunrpc/xprtsock.c~git-net net/sunrpc/xprtsock.c
--- a/net/sunrpc/xprtsock.c~git-net
+++ a/net/sunrpc/xprtsock.c
@@ -174,7 +174,6 @@ static inline int xs_sendpages(struct so
 	struct page **ppage = xdr->pages;
 	unsigned int len, pglen = xdr->page_len;
 	int err, ret = 0;
-	ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
 
 	if (unlikely(!sock))
 		return -ENOTCONN;
@@ -207,7 +206,6 @@ static inline int xs_sendpages(struct so
 		base &= ~PAGE_CACHE_MASK;
 	}
 
-	sendpage = sock->ops->sendpage ? : sock_no_sendpage;
 	do {
 		int flags = XS_SENDMSG_FLAGS;
 
@@ -220,10 +218,7 @@ static inline int xs_sendpages(struct so
 		if (pglen != len || xdr->tail[0].iov_len != 0)
 			flags |= MSG_MORE;
 
-		/* Hmm... We might be dealing with highmem pages */
-		if (PageHighMem(*ppage))
-			sendpage = sock_no_sendpage;
-		err = sendpage(sock, *ppage, base, len, flags);
+		err = kernel_sendpage(sock, *ppage, base, len, flags);
 		if (ret == 0)
 			ret = err;
 		else if (err > 0)
@@ -986,7 +981,7 @@ static int xs_bindresvport(struct rpc_xp
 
 	do {
 		myaddr.sin_port = htons(port);
-		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
 		if (err == 0) {
 			xprt->port = port;
@@ -1081,7 +1076,7 @@ static void xs_tcp_reuse_connection(stru
 	 */
 	memset(&any, 0, sizeof(any));
 	any.sa_family = AF_UNSPEC;
-	result = sock->ops->connect(sock, &any, sizeof(any), 0);
+	result = kernel_connect(sock, &any, sizeof(any), 0);
 	if (result)
 		dprintk("RPC:      AF_UNSPEC connect return code %d\n",
 				result);
@@ -1151,7 +1146,7 @@ static void xs_tcp_connect_worker(void *
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
-	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
+	status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
 			sizeof(xprt->addr), O_NONBLOCK);
 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
diff -puN net/xfrm/xfrm_policy.c~git-net net/xfrm/xfrm_policy.c
--- a/net/xfrm/xfrm_policy.c~git-net
+++ a/net/xfrm/xfrm_policy.c
@@ -597,7 +597,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
@@ -613,7 +613,7 @@ static void xfrm_policy_lookup(struct fl
 		match = xfrm_selector_match(sel, fl, family);
 
 		if (match) {
- 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+ 			if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) {
 				xfrm_pol_hold(pol);
 				break;
 			}
@@ -641,7 +641,7 @@ static inline int policy_to_flow_dir(int
 	};
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
 {
 	struct xfrm_policy *pol;
 
@@ -652,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_polic
  		int err = 0;
 
 		if (match)
-		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+		  err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir));
 
  		if (match && !err)
 			xfrm_pol_hold(pol);
@@ -862,19 +862,19 @@ int xfrm_lookup(struct dst_entry **dst_p
 	u32 genid;
 	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
-	u32 sk_sid = security_sk_sid(sk, fl, dir);
+
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
 	}
 
@@ -1032,13 +1032,15 @@ int
 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl);
+	err = security_xfrm_decode_session(skb, &fl->secid);
 	xfrm_policy_put_afinfo(afinfo);
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
@@ -1058,14 +1060,11 @@ int __xfrm_policy_check(struct sock *sk,
 	struct xfrm_policy *pol;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
-	u32 sk_sid;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
 	nf_nat_decode_session(skb, &fl, family);
 
-	sk_sid = security_sk_sid(sk, &fl, fl_dir);
-
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
@@ -1079,10 +1078,10 @@ int __xfrm_policy_check(struct sock *sk,
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
+		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (!pol)
@@ -1134,12 +1133,13 @@ int __xfrm_route_forward(struct sk_buff 
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
+/* Optimize later using cookies and generation ids. */
+
 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 {
-	/* If it is marked obsolete, which is how we even get here,
-	 * then we have purged it from the policy bundle list and we
-	 * did that for a good reason.
-	 */
+	if (!stale_bundle(dst))
+		return dst;
+
 	return NULL;
 }
 
@@ -1277,6 +1277,8 @@ int xfrm_bundle_ok(struct xfrm_dst *firs
 
 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
 			return 0;
+		if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
+			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
 
diff -puN net/xfrm/xfrm_state.c~git-net net/xfrm/xfrm_state.c
--- a/net/xfrm/xfrm_state.c~git-net
+++ a/net/xfrm/xfrm_state.c
@@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, x
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
 				if (!xfrm_selector_match(&x->sel, fl, family) ||
-				    !xfrm_sec_ctx_match(pol->security, x->security))
+				    !security_xfrm_state_pol_flow_match(x, pol, fl))
 					continue;
 				if (!best ||
 				    best->km.dying > x->km.dying ||
@@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, x
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
  				if (xfrm_selector_match(&x->sel, fl, family) &&
-				    xfrm_sec_ctx_match(pol->security, x->security))
+				    security_xfrm_state_pol_flow_match(x, pol, fl))
 					error = -ESRCH;
 			}
 		}
@@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, x
 		 * to current session. */
 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
 
+		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
+		if (error) {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			goto out;
+		}
+
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
 			list_add_tail(&x->bydst, xfrm_state_bydst+h);
@@ -1018,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, in
 	err = -EINVAL;
 	read_lock(&xfrm_km_lock);
 	list_for_each_entry(km, &xfrm_km_list, list) {
-		pol = km->compile_policy(sk->sk_family, optname, data,
+		pol = km->compile_policy(sk, optname, data,
 					 optlen, &err);
 		if (err >= 0)
 			break;
diff -puN net/xfrm/xfrm_user.c~git-net net/xfrm/xfrm_user.c
--- a/net/xfrm/xfrm_user.c~git-net
+++ a/net/xfrm/xfrm_user.c
@@ -909,27 +909,40 @@ rtattr_failure:
 	return -1;
 }
 
-static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
 {
-	if (xp->security) {
-		int ctx_size = sizeof(struct xfrm_sec_ctx) +
-				xp->security->ctx_len;
-		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
-		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
-
-		uctx->exttype = XFRMA_SEC_CTX;
-		uctx->len = ctx_size;
-		uctx->ctx_doi = xp->security->ctx_doi;
-		uctx->ctx_alg = xp->security->ctx_alg;
-		uctx->ctx_len = xp->security->ctx_len;
-		memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
-	}
-	return 0;
+	int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len;
+	struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+	struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+	uctx->exttype = XFRMA_SEC_CTX;
+	uctx->len = ctx_size;
+	uctx->ctx_doi = s->ctx_doi;
+	uctx->ctx_alg = s->ctx_alg;
+	uctx->ctx_len = s->ctx_len;
+	memcpy(uctx + 1, s->ctx_str, s->ctx_len);
+ 	return 0;
 
  rtattr_failure:
 	return -1;
 }
 
+static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb)
+{
+	if (x->security) {
+		return copy_sec_ctx(x->security, skb);
+	}
+	return 0;
+}
+
+static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	if (xp->security) {
+		return copy_sec_ctx(xp->security, skb);
+	}
+	return 0;
+}
+
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
 {
 	struct xfrm_dump_info *sp = ptr;
@@ -1708,7 +1721,7 @@ static int build_acquire(struct sk_buff 
 
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
-	if (copy_to_user_sec_ctx(xp, skb))
+	if (copy_to_user_state_sec_ctx(x, skb))
 		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -1742,7 +1755,7 @@ static int xfrm_send_acquire(struct xfrm
 /* User gives us xfrm_user_policy_info followed by an array of 0
  * or more templates.
  */
-static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 					       u8 *data, int len, int *dir)
 {
 	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
@@ -1750,7 +1763,7 @@ static struct xfrm_policy *xfrm_compile_
 	struct xfrm_policy *xp;
 	int nr;
 
-	switch (family) {
+	switch (sk->sk_family) {
 	case AF_INET:
 		if (opt != IP_XFRM_POLICY) {
 			*dir = -EOPNOTSUPP;
@@ -1792,6 +1805,15 @@ static struct xfrm_policy *xfrm_compile_
 	copy_from_user_policy(xp, p);
 	copy_templates(xp, ut, nr);
 
+	if (!xp->security) {
+		int err = security_xfrm_sock_policy_alloc(xp, sk);
+		if (err) {
+			kfree(xp);
+			*dir = err;
+			return NULL;
+		}
+	}
+
 	*dir = p->dir;
 
 	return xp;
diff -puN security/dummy.c~git-net security/dummy.c
--- a/security/dummy.c~git-net
+++ a/security/dummy.c
@@ -709,10 +709,10 @@ static int dummy_socket_create (int fami
 	return 0;
 }
 
-static void dummy_socket_post_create (struct socket *sock, int family, int type,
-				      int protocol, int kern)
+static int dummy_socket_post_create (struct socket *sock, int family, int type,
+				     int protocol, int kern)
 {
-	return;
+	return 0;
 }
 
 static int dummy_socket_bind (struct socket *sock, struct sockaddr *address,
@@ -805,14 +805,38 @@ static inline void dummy_sk_free_securit
 {
 }
 
-static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk)
+{
+}
+
+static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid)
+{
+}
+
+static inline void dummy_sock_graft(struct sock* sk, struct socket *parent)
+{
+}
+
+static inline int dummy_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
 {
 	return 0;
 }
+
+static inline void dummy_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+}
+
+static inline void dummy_req_classify_flow(const struct request_sock *req,
+			struct flowi *fl)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
+static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk)
 {
 	return 0;
 }
@@ -831,7 +855,8 @@ static int dummy_xfrm_policy_delete_secu
 	return 0;
 }
 
-static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static int dummy_xfrm_state_alloc_security(struct xfrm_state *x,
+	struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid)
 {
 	return 0;
 }
@@ -849,6 +874,23 @@ static int dummy_xfrm_policy_lookup(stru
 {
 	return 0;
 }
+
+static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x,
+				struct xfrm_policy *xp, struct flowi *fl)
+{
+	return 1;
+}
+
+static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	return 1;
+}
+
+static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SECURITY_NETWORK_XFRM */
 static int dummy_register_security (const char *name, struct security_operations *ops)
 {
@@ -1060,7 +1102,12 @@ void security_fixup_ops (struct security
 	set_to_dummy_if_null(ops, socket_getpeersec_dgram);
 	set_to_dummy_if_null(ops, sk_alloc_security);
 	set_to_dummy_if_null(ops, sk_free_security);
-	set_to_dummy_if_null(ops, sk_getsid);
+	set_to_dummy_if_null(ops, sk_clone_security);
+	set_to_dummy_if_null(ops, sk_getsecid);
+	set_to_dummy_if_null(ops, sock_graft);
+	set_to_dummy_if_null(ops, inet_conn_request);
+	set_to_dummy_if_null(ops, inet_csk_clone);
+	set_to_dummy_if_null(ops, req_classify_flow);
  #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef  CONFIG_SECURITY_NETWORK_XFRM
 	set_to_dummy_if_null(ops, xfrm_policy_alloc_security);
@@ -1071,6 +1118,9 @@ void security_fixup_ops (struct security
 	set_to_dummy_if_null(ops, xfrm_state_free_security);
 	set_to_dummy_if_null(ops, xfrm_state_delete_security);
 	set_to_dummy_if_null(ops, xfrm_policy_lookup);
+	set_to_dummy_if_null(ops, xfrm_state_pol_flow_match);
+	set_to_dummy_if_null(ops, xfrm_flow_state_match);
+	set_to_dummy_if_null(ops, xfrm_decode_session);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 #ifdef CONFIG_KEYS
 	set_to_dummy_if_null(ops, key_alloc);
diff -puN security/selinux/hooks.c~git-net security/selinux/hooks.c
--- a/security/selinux/hooks.c~git-net
+++ a/security/selinux/hooks.c
@@ -12,6 +12,8 @@
  *  Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *  Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
  *                          <dgoeddel@trustedcs.com>
+ *  Copyright (C) 2006 Hewlett-Packard Development Company, L.P.
+ *                     Paul Moore, <paul.moore@hp.com>
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -74,6 +76,7 @@
 #include "objsec.h"
 #include "netif.h"
 #include "xfrm.h"
+#include "selinux_netlabel.h"
 
 #define XATTR_SELINUX_SUFFIX "selinux"
 #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
@@ -269,15 +272,13 @@ static int sk_alloc_security(struct sock
 {
 	struct sk_security_struct *ssec;
 
-	if (family != PF_UNIX)
-		return 0;
-
 	ssec = kzalloc(sizeof(*ssec), priority);
 	if (!ssec)
 		return -ENOMEM;
 
 	ssec->sk = sk;
 	ssec->peer_sid = SECINITSID_UNLABELED;
+	ssec->sid = SECINITSID_UNLABELED;
 	sk->sk_security = ssec;
 
 	return 0;
@@ -287,9 +288,6 @@ static void sk_free_security(struct sock
 {
 	struct sk_security_struct *ssec = sk->sk_security;
 
-	if (sk->sk_family != PF_UNIX)
-		return;
-
 	sk->sk_security = NULL;
 	kfree(ssec);
 }
@@ -2400,6 +2398,7 @@ static int selinux_inode_listsecurity(st
 
 static int selinux_file_permission(struct file *file, int mask)
 {
+	int rc;
 	struct inode *inode = file->f_dentry->d_inode;
 
 	if (!mask) {
@@ -2411,8 +2410,12 @@ static int selinux_file_permission(struc
 	if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
 		mask |= MAY_APPEND;
 
-	return file_has_perm(current, file,
-			     file_mask_to_av(inode->i_mode, mask));
+	rc = file_has_perm(current, file,
+			   file_mask_to_av(inode->i_mode, mask));
+	if (rc)
+		return rc;
+
+	return selinux_netlbl_inode_permission(inode, mask);
 }
 
 static int selinux_file_alloc_security(struct file *file)
@@ -3063,11 +3066,13 @@ out:
 	return err;
 }
 
-static void selinux_socket_post_create(struct socket *sock, int family,
-				       int type, int protocol, int kern)
+static int selinux_socket_post_create(struct socket *sock, int family,
+				      int type, int protocol, int kern)
 {
+	int err = 0;
 	struct inode_security_struct *isec;
 	struct task_security_struct *tsec;
+	struct sk_security_struct *sksec;
 	u32 newsid;
 
 	isec = SOCK_INODE(sock)->i_security;
@@ -3078,7 +3083,15 @@ static void selinux_socket_post_create(s
 	isec->sid = kern ? SECINITSID_KERNEL : newsid;
 	isec->initialized = 1;
 
-	return;
+	if (sock->sk) {
+		sksec = sock->sk->sk_security;
+		sksec->sid = isec->sid;
+		err = selinux_netlbl_socket_post_create(sock,
+							family,
+							isec->sid);
+	}
+
+	return err;
 }
 
 /* Range of port numbers used to automatically bind.
@@ -3259,7 +3272,13 @@ static int selinux_socket_accept(struct 
 static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg,
  				  int size)
 {
-	return socket_has_perm(current, sock, SOCKET__WRITE);
+	int rc;
+
+	rc = socket_has_perm(current, sock, SOCKET__WRITE);
+	if (rc)
+		return rc;
+
+	return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE);
 }
 
 static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -3327,8 +3346,9 @@ static int selinux_socket_unix_stream_co
 	/* server child socket */
 	ssec = newsk->sk_security;
 	ssec->peer_sid = isec->sid;
-	
-	return 0;
+	err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid);
+
+	return err;
 }
 
 static int selinux_socket_unix_may_send(struct socket *sock,
@@ -3354,11 +3374,29 @@ static int selinux_socket_unix_may_send(
 }
 
 static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
-		struct avc_audit_data *ad, u32 sock_sid, u16 sock_class,
-		u16 family, char *addrp, int len)
+		struct avc_audit_data *ad, u16 family, char *addrp, int len)
 {
 	int err = 0;
 	u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0;
+	struct socket *sock;
+	u16 sock_class = 0;
+	u32 sock_sid = 0;
+
+ 	read_lock_bh(&sk->sk_callback_lock);
+ 	sock = sk->sk_socket;
+ 	if (sock) {
+ 		struct inode *inode;
+ 		inode = SOCK_INODE(sock);
+ 		if (inode) {
+ 			struct inode_security_struct *isec;
+ 			isec = inode->i_security;
+ 			sock_sid = isec->sid;
+ 			sock_class = isec->sclass;
+ 		}
+ 	}
+ 	read_unlock_bh(&sk->sk_callback_lock);
+ 	if (!sock_sid)
+  		goto out;
 
 	if (!skb->dev)
 		goto out;
@@ -3418,12 +3456,10 @@ out:
 static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	u16 family;
-	u16 sock_class = 0;
 	char *addrp;
 	int len, err = 0;
-	u32 sock_sid = 0;
-	struct socket *sock;
 	struct avc_audit_data ad;
+	struct sk_security_struct *sksec = sk->sk_security;
 
 	family = sk->sk_family;
 	if (family != PF_INET && family != PF_INET6)
@@ -3433,22 +3469,6 @@ static int selinux_socket_sock_rcv_skb(s
 	if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP))
 		family = PF_INET;
 
- 	read_lock_bh(&sk->sk_callback_lock);
- 	sock = sk->sk_socket;
- 	if (sock) {
- 		struct inode *inode;
- 		inode = SOCK_INODE(sock);
- 		if (inode) {
- 			struct inode_security_struct *isec;
- 			isec = inode->i_security;
- 			sock_sid = isec->sid;
- 			sock_class = isec->sclass;
- 		}
- 	}
- 	read_unlock_bh(&sk->sk_callback_lock);
- 	if (!sock_sid)
-  		goto out;
-
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]";
 	ad.u.net.family = family;
@@ -3458,16 +3478,19 @@ static int selinux_socket_sock_rcv_skb(s
 		goto out;
 
 	if (selinux_compat_net)
-		err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid,
-						  sock_class, family,
+		err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family,
 						  addrp, len);
 	else
-		err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET,
+		err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__RECV, &ad);
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_sock_rcv_skb(sock_sid, skb);
+	err = selinux_netlbl_sock_rcv_skb(sksec, skb, &ad);
+	if (err)
+		goto out;
+
+	err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
 out:	
 	return err;
 }
@@ -3490,8 +3513,9 @@ static int selinux_socket_getpeersec_str
 		peer_sid = ssec->peer_sid;
 	}
 	else if (isec->sclass == SECCLASS_TCP_SOCKET) {
-		peer_sid = selinux_socket_getpeer_stream(sock->sk);
-
+		peer_sid = selinux_netlbl_socket_getpeersec_stream(sock);
+		if (peer_sid == SECSID_NULL)
+			peer_sid = selinux_socket_getpeer_stream(sock->sk);
 		if (peer_sid == SECSID_NULL) {
 			err = -ENOPROTOOPT;
 			goto out;
@@ -3531,8 +3555,11 @@ static int selinux_socket_getpeersec_dgr
 
 	if (sock && (sock->sk->sk_family == PF_UNIX))
 		selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid);
-	else if (skb)
-		peer_secid = selinux_socket_getpeer_dgram(skb);
+	else if (skb) {
+		peer_secid = selinux_netlbl_socket_getpeersec_dgram(skb);
+		if (peer_secid == SECSID_NULL)
+			peer_secid = selinux_socket_getpeer_dgram(skb);
+	}
 
 	if (peer_secid == SECSID_NULL)
 		err = -EINVAL;
@@ -3551,22 +3578,82 @@ static void selinux_sk_free_security(str
 	sk_free_security(sk);
 }
 
-static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir)
+static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
 {
-	struct inode_security_struct *isec;
-	u32 sock_sid = SECINITSID_ANY_SOCKET;
+	struct sk_security_struct *ssec = sk->sk_security;
+	struct sk_security_struct *newssec = newsk->sk_security;
 
+	newssec->sid = ssec->sid;
+	newssec->peer_sid = ssec->peer_sid;
+}
+
+static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
+{
 	if (!sk)
-		return selinux_no_sk_sid(fl);
+		*secid = SECINITSID_ANY_SOCKET;
+	else {
+		struct sk_security_struct *sksec = sk->sk_security;
+
+		*secid = sksec->sid;
+	}
+}
+
+static void selinux_sock_graft(struct sock* sk, struct socket *parent)
+{
+	struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
+	struct sk_security_struct *sksec = sk->sk_security;
+
+	isec->sid = sksec->sid;
+
+	selinux_netlbl_sock_graft(sk, parent);
+}
 
-	read_lock_bh(&sk->sk_callback_lock);
-	isec = get_sock_isec(sk);
+static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+				     struct request_sock *req)
+{
+	struct sk_security_struct *sksec = sk->sk_security;
+	int err;
+	u32 newsid;
+	u32 peersid;
+
+	newsid = selinux_netlbl_inet_conn_request(skb, sksec->sid);
+	if (newsid != SECSID_NULL) {
+		req->secid = newsid;
+		return 0;
+	}
+
+	err = selinux_xfrm_decode_session(skb, &peersid, 0);
+	BUG_ON(err);
+
+	if (peersid == SECSID_NULL) {
+		req->secid = sksec->sid;
+		return 0;
+	}
 
-	if (isec)
-		sock_sid = isec->sid;
+	err = security_sid_mls_copy(sksec->sid, peersid, &newsid);
+	if (err)
+		return err;
 
-	read_unlock_bh(&sk->sk_callback_lock);
-	return sock_sid;
+	req->secid = newsid;
+	return 0;
+}
+
+static void selinux_inet_csk_clone(struct sock *newsk,
+				   const struct request_sock *req)
+{
+	struct sk_security_struct *newsksec = newsk->sk_security;
+
+	newsksec->sid = req->secid;
+	/* NOTE: Ideally, we should also get the isec->sid for the
+	   new socket in sync, but we don't have the isec available yet.
+	   So we will wait until sock_graft to do it, by which
+	   time it will have been created and available. */
+}
+
+static void selinux_req_classify_flow(const struct request_sock *req,
+				      struct flowi *fl)
+{
+	fl->secid = req->secid;
 }
 
 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
@@ -3608,12 +3695,24 @@ out:
 #ifdef CONFIG_NETFILTER
 
 static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev,
-					    struct inode_security_struct *isec,
 					    struct avc_audit_data *ad,
 					    u16 family, char *addrp, int len)
 {
-	int err;
+	int err = 0;
 	u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0;
+	struct socket *sock;
+	struct inode *inode;
+	struct inode_security_struct *isec;
+
+	sock = sk->sk_socket;
+	if (!sock)
+		goto out;
+
+	inode = SOCK_INODE(sock);
+	if (!inode)
+		goto out;
+
+	isec = inode->i_security;
 	
 	err = sel_netif_sids(dev, &if_sid, NULL);
 	if (err)
@@ -3678,26 +3777,16 @@ static unsigned int selinux_ip_postroute
 	char *addrp;
 	int len, err = 0;
 	struct sock *sk;
-	struct socket *sock;
-	struct inode *inode;
 	struct sk_buff *skb = *pskb;
-	struct inode_security_struct *isec;
 	struct avc_audit_data ad;
 	struct net_device *dev = (struct net_device *)out;
+	struct sk_security_struct *sksec;
 
 	sk = skb->sk;
 	if (!sk)
 		goto out;
 
-	sock = sk->sk_socket;
-	if (!sock)
-		goto out;
-
-	inode = SOCK_INODE(sock);
-	if (!inode)
-		goto out;
-
-	isec = inode->i_security;
+	sksec = sk->sk_security;
 
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = dev->name;
@@ -3708,16 +3797,16 @@ static unsigned int selinux_ip_postroute
 		goto out;
 
 	if (selinux_compat_net)
-		err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad,
+		err = selinux_ip_postroute_last_compat(sk, dev, &ad,
 						       family, addrp, len);
 	else
-		err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET,
+		err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__SEND, &ad);
 
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_postroute_last(isec->sid, skb);
+	err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad);
 out:
 	return err ? NF_DROP : NF_ACCEPT;
 }
@@ -4618,7 +4707,12 @@ static struct security_operations selinu
 	.socket_getpeersec_dgram =	selinux_socket_getpeersec_dgram,
 	.sk_alloc_security =		selinux_sk_alloc_security,
 	.sk_free_security =		selinux_sk_free_security,
-	.sk_getsid = 			selinux_sk_getsid_security,
+	.sk_clone_security =		selinux_sk_clone_security,
+	.sk_getsecid = 			selinux_sk_getsecid,
+	.sock_graft =			selinux_sock_graft,
+	.inet_conn_request =		selinux_inet_conn_request,
+	.inet_csk_clone =		selinux_inet_csk_clone,
+	.req_classify_flow =		selinux_req_classify_flow,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
@@ -4629,6 +4723,9 @@ static struct security_operations selinu
 	.xfrm_state_free_security =	selinux_xfrm_state_free,
 	.xfrm_state_delete_security =	selinux_xfrm_state_delete,
 	.xfrm_policy_lookup = 		selinux_xfrm_policy_lookup,
+	.xfrm_state_pol_flow_match =	selinux_xfrm_state_pol_flow_match,
+	.xfrm_flow_state_match =	selinux_xfrm_flow_state_match,
+	.xfrm_decode_session =		selinux_xfrm_decode_session,
 #endif
 
 #ifdef CONFIG_KEYS
diff -puN security/selinux/include/av_perm_to_string.h~git-net security/selinux/include/av_perm_to_string.h
--- a/security/selinux/include/av_perm_to_string.h~git-net
+++ a/security/selinux/include/av_perm_to_string.h
@@ -241,6 +241,7 @@
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto")
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom")
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext")
+   S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch")
    S_(SECCLASS_PACKET, PACKET__SEND, "send")
    S_(SECCLASS_PACKET, PACKET__RECV, "recv")
    S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto")
diff -puN security/selinux/include/av_permissions.h~git-net security/selinux/include/av_permissions.h
--- a/security/selinux/include/av_permissions.h~git-net
+++ a/security/selinux/include/av_permissions.h
@@ -911,6 +911,7 @@
 #define ASSOCIATION__SENDTO                       0x00000001UL
 #define ASSOCIATION__RECVFROM                     0x00000002UL
 #define ASSOCIATION__SETCONTEXT                   0x00000004UL
+#define ASSOCIATION__POLMATCH                     0x00000008UL
 
 #define NETLINK_KOBJECT_UEVENT_SOCKET__IOCTL      0x00000001UL
 #define NETLINK_KOBJECT_UEVENT_SOCKET__READ       0x00000002UL
diff -puN security/selinux/include/objsec.h~git-net security/selinux/include/objsec.h
--- a/security/selinux/include/objsec.h~git-net
+++ a/security/selinux/include/objsec.h
@@ -99,7 +99,16 @@ struct netif_security_struct {
 
 struct sk_security_struct {
 	struct sock *sk;		/* back pointer to sk object */
+	u32 sid;			/* SID of this object */
 	u32 peer_sid;			/* SID of peer */
+#ifdef CONFIG_NETLABEL
+	u16 sclass;			/* sock security class */
+	enum {				/* NetLabel state */
+		NLBL_UNSET = 0,
+		NLBL_REQUIRE,
+		NLBL_LABELED,
+	} nlbl_state;
+#endif
 };
 
 struct key_security_struct {
diff -puN security/selinux/include/security.h~git-net security/selinux/include/security.h
--- a/security/selinux/include/security.h~git-net
+++ a/security/selinux/include/security.h
@@ -78,6 +78,8 @@ int security_node_sid(u16 domain, void *
 int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
                                  u16 tclass);
 
+int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
+
 #define SECURITY_FS_USE_XATTR		1 /* use xattr */
 #define SECURITY_FS_USE_TRANS		2 /* use transition SIDs, e.g. devpts/tmpfs */
 #define SECURITY_FS_USE_TASK		3 /* use task SIDs, e.g. pipefs/sockfs */
diff -puN /dev/null security/selinux/include/selinux_netlabel.h
--- /dev/null
+++ a/security/selinux/include/selinux_netlabel.h
@@ -0,0 +1,125 @@
+/*
+ * SELinux interface to the NetLabel subsystem
+ *
+ * Author : Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _SELINUX_NETLABEL_H_
+#define _SELINUX_NETLABEL_H_
+
+#ifdef CONFIG_NETLABEL
+void selinux_netlbl_cache_invalidate(void);
+int selinux_netlbl_socket_post_create(struct socket *sock,
+				      int sock_family,
+				      u32 sid);
+void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
+u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid);
+int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+				struct sk_buff *skb,
+				struct avc_audit_data *ad);
+u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock);
+u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb);
+
+int __selinux_netlbl_inode_permission(struct inode *inode, int mask);
+/**
+ * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled
+ * @inode: the file descriptor's inode
+ * @mask: the permission mask
+ *
+ * Description:
+ * Looks at a file's inode and if it is marked as a socket protected by
+ * NetLabel then verify that the socket has been labeled, if not try to label
+ * the socket now with the inode's SID.  Returns zero on success, negative
+ * values on failure.
+ *
+ */
+static inline int selinux_netlbl_inode_permission(struct inode *inode,
+						  int mask)
+{
+	int rc = 0;
+	struct inode_security_struct *isec;
+	struct sk_security_struct *sksec;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return 0;
+
+	isec = inode->i_security;
+	sksec = SOCKET_I(inode)->sk->sk_security;
+	down(&isec->sem);
+	if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
+		     (mask & (MAY_WRITE | MAY_APPEND))))
+		rc = __selinux_netlbl_inode_permission(inode, mask);
+	up(&isec->sem);
+
+	return rc;
+}
+#else
+static inline void selinux_netlbl_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int selinux_netlbl_socket_post_create(struct socket *sock,
+						    int sock_family,
+						    u32 sid)
+{
+	return 0;
+}
+
+static inline void selinux_netlbl_sock_graft(struct sock *sk,
+					     struct socket *sock)
+{
+	return;
+}
+
+static inline u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb,
+						   u32 sock_sid)
+{
+	return SECSID_NULL;
+}
+
+static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+					      struct sk_buff *skb,
+					      struct avc_audit_data *ad)
+{
+	return 0;
+}
+
+static inline u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock)
+{
+	return SECSID_NULL;
+}
+
+static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
+{
+	return SECSID_NULL;
+}
+
+static inline int selinux_netlbl_inode_permission(struct inode *inode,
+						  int mask)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif
diff -puN security/selinux/include/xfrm.h~git-net security/selinux/include/xfrm.h
--- a/security/selinux/include/xfrm.h~git-net
+++ a/security/selinux/include/xfrm.h
@@ -2,18 +2,25 @@
  * SELinux support for the XFRM LSM hooks
  *
  * Author : Trent Jaeger, <jaegert@us.ibm.com>
+ * Updated : Venkat Yekkirala, <vyekkirala@TrustedCS.com>
  */
 #ifndef _SELINUX_XFRM_H_
 #define _SELINUX_XFRM_H_
 
-int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+int selinux_xfrm_policy_alloc(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk);
 int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new);
 void selinux_xfrm_policy_free(struct xfrm_policy *xp);
 int selinux_xfrm_policy_delete(struct xfrm_policy *xp);
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+	struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid);
 void selinux_xfrm_state_free(struct xfrm_state *x);
 int selinux_xfrm_state_delete(struct xfrm_state *x);
-int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl);
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
+
 
 /*
  * Extract the security blob from the sock (it's actually on the socket)
@@ -26,30 +33,23 @@ static inline struct inode_security_stru
 	return SOCK_INODE(sk->sk_socket)->i_security;
 }
 
-
-static inline u32 selinux_no_sk_sid(struct flowi *fl)
-{
-	/* NOTE: no sock occurs on ICMP reply, forwards, ... */
-	/* icmp_reply: authorize as kernel packet */
-	if (fl && fl->proto == IPPROTO_ICMP) {
-		return SECINITSID_KERNEL;
-	}
-
-	return SECINITSID_ANY_SOCKET;
-}
-
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb);
+int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
+			struct avc_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad);
 u32 selinux_socket_getpeer_stream(struct sock *sk);
 u32 selinux_socket_getpeer_dgram(struct sk_buff *skb);
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 #else
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
+static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad)
 {
 	return 0;
 }
 
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
+static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad)
 {
 	return 0;
 }
@@ -63,6 +63,11 @@ static inline int selinux_socket_getpeer
 {
 	return SECSID_NULL;
 }
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+{
+	*sid = SECSID_NULL;
+	return 0;
+}
 #endif
 
 #endif /* _SELINUX_XFRM_H_ */
diff -puN security/selinux/ss/ebitmap.c~git-net security/selinux/ss/ebitmap.c
--- a/security/selinux/ss/ebitmap.c~git-net
+++ a/security/selinux/ss/ebitmap.c
@@ -3,6 +3,14 @@
  *
  * Author : Stephen Smalley, <sds@epoch.ncsc.mil>
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added ebitmap_export() and ebitmap_import()
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
@@ -59,6 +67,142 @@ int ebitmap_cpy(struct ebitmap *dst, str
 	return 0;
 }
 
+/**
+ * ebitmap_export - Export an ebitmap to a unsigned char bitmap string
+ * @src: the ebitmap to export
+ * @dst: the resulting bitmap string
+ * @dst_len: length of dst in bytes
+ *
+ * Description:
+ * Allocate a buffer at least src->highbit bits long and export the extensible
+ * bitmap into the buffer.  The bitmap string will be in little endian format,
+ * i.e. LSB first.  The value returned in dst_len may not the true size of the
+ * buffer as the length of the buffer is rounded up to a multiple of MAPTYPE.
+ * The caller must free the buffer when finished. Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int ebitmap_export(const struct ebitmap *src,
+		   unsigned char **dst,
+		   size_t *dst_len)
+{
+	size_t bitmap_len;
+	unsigned char *bitmap;
+	struct ebitmap_node *iter_node;
+	MAPTYPE node_val;
+	size_t bitmap_byte;
+	unsigned char bitmask;
+
+	bitmap_len = src->highbit / 8;
+	if (src->highbit % 7)
+		bitmap_len += 1;
+	if (bitmap_len == 0)
+		return -EINVAL;
+
+	bitmap = kzalloc((bitmap_len & ~(sizeof(MAPTYPE) - 1)) +
+			 sizeof(MAPTYPE),
+			 GFP_ATOMIC);
+	if (bitmap == NULL)
+		return -ENOMEM;
+
+	iter_node = src->node;
+	do {
+		bitmap_byte = iter_node->startbit / 8;
+		bitmask = 0x80;
+		node_val = iter_node->map;
+		do {
+			if (bitmask == 0) {
+				bitmap_byte++;
+				bitmask = 0x80;
+			}
+			if (node_val & (MAPTYPE)0x01)
+				bitmap[bitmap_byte] |= bitmask;
+			node_val >>= 1;
+			bitmask >>= 1;
+		} while (node_val > 0);
+		iter_node = iter_node->next;
+	} while (iter_node);
+
+	*dst = bitmap;
+	*dst_len = bitmap_len;
+	return 0;
+}
+
+/**
+ * ebitmap_import - Import an unsigned char bitmap string into an ebitmap
+ * @src: the bitmap string
+ * @src_len: the bitmap length in bytes
+ * @dst: the empty ebitmap
+ *
+ * Description:
+ * This function takes a little endian bitmap string in src and imports it into
+ * the ebitmap pointed to by dst.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int ebitmap_import(const unsigned char *src,
+		   size_t src_len,
+		   struct ebitmap *dst)
+{
+	size_t src_off = 0;
+	struct ebitmap_node *node_new;
+	struct ebitmap_node *node_last = NULL;
+	size_t iter;
+	size_t iter_bit;
+	size_t iter_limit;
+	unsigned char src_byte;
+
+	do {
+		iter_limit = src_len - src_off;
+		if (iter_limit >= sizeof(MAPTYPE)) {
+			if (*(MAPTYPE *)&src[src_off] == 0) {
+				src_off += sizeof(MAPTYPE);
+				continue;
+			}
+			iter_limit = sizeof(MAPTYPE);
+		} else {
+			iter = src_off;
+			src_byte = 0;
+			do {
+				src_byte |= src[iter++];
+			} while (iter < src_len && src_byte == 0);
+			if (src_byte == 0)
+				break;
+		}
+
+		node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC);
+		if (unlikely(node_new == NULL)) {
+			ebitmap_destroy(dst);
+			return -ENOMEM;
+		}
+		node_new->startbit = src_off * 8;
+		iter = 0;
+		do {
+			src_byte = src[src_off++];
+			iter_bit = iter++ * 8;
+			while (src_byte != 0) {
+				if (src_byte & 0x80)
+					node_new->map |= MAPBIT << iter_bit;
+				iter_bit++;
+				src_byte <<= 1;
+			}
+		} while (iter < iter_limit);
+
+		if (node_last != NULL)
+			node_last->next = node_new;
+		else
+			dst->node = node_new;
+		node_last = node_new;
+	} while (src_off < src_len);
+
+	if (likely(node_last != NULL))
+		dst->highbit = node_last->startbit + MAPSIZE;
+	else
+		ebitmap_init(dst);
+
+	return 0;
+}
+
 int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
 {
 	struct ebitmap_node *n1, *n2;
diff -puN security/selinux/ss/ebitmap.h~git-net security/selinux/ss/ebitmap.h
--- a/security/selinux/ss/ebitmap.h~git-net
+++ a/security/selinux/ss/ebitmap.h
@@ -69,6 +69,12 @@ static inline int ebitmap_node_get_bit(s
 
 int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
+int ebitmap_export(const struct ebitmap *src,
+		   unsigned char **dst,
+		   size_t *dst_len);
+int ebitmap_import(const unsigned char *src,
+		   size_t src_len,
+		   struct ebitmap *dst);
 int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
 int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
diff -puN security/selinux/ss/mls.c~git-net security/selinux/ss/mls.c
--- a/security/selinux/ss/mls.c~git-net
+++ a/security/selinux/ss/mls.c
@@ -10,6 +10,13 @@
  *
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support to import/export the MLS label
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -212,26 +219,6 @@ int mls_context_isvalid(struct policydb 
 }
 
 /*
- * Copies the MLS range from `src' into `dst'.
- */
-static inline int mls_copy_context(struct context *dst,
-				   struct context *src)
-{
-	int l, rc = 0;
-
-	/* Copy the MLS range from the source context */
-	for (l = 0; l < 2; l++) {
-		dst->range.level[l].sens = src->range.level[l].sens;
-		rc = ebitmap_cpy(&dst->range.level[l].cat,
-				 &src->range.level[l].cat);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
-/*
  * Set the MLS fields in the security context structure
  * `context' based on the string representation in
  * the string `*scontext'.  Update `*scontext' to
@@ -585,3 +572,152 @@ int mls_compute_sid(struct context *scon
 	return -EINVAL;
 }
 
+/**
+ * mls_export_lvl - Export the MLS sensitivity levels
+ * @context: the security context
+ * @low: the low sensitivity level
+ * @high: the high sensitivity level
+ *
+ * Description:
+ * Given the security context copy the low MLS sensitivity level into lvl_low
+ * and the high sensitivity level in lvl_high.  The MLS levels are only
+ * exported if the pointers are not NULL, if they are NULL then that level is
+ * not exported.
+ *
+ */
+void mls_export_lvl(const struct context *context, u32 *low, u32 *high)
+{
+	if (!selinux_mls_enabled)
+		return;
+
+	if (low != NULL)
+		*low = context->range.level[0].sens - 1;
+	if (high != NULL)
+		*high = context->range.level[1].sens - 1;
+}
+
+/**
+ * mls_import_lvl - Import the MLS sensitivity levels
+ * @context: the security context
+ * @low: the low sensitivity level
+ * @high: the high sensitivity level
+ *
+ * Description:
+ * Given the security context and the two sensitivty levels, set the MLS levels
+ * in the context according the two given as parameters.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+void mls_import_lvl(struct context *context, u32 low, u32 high)
+{
+	if (!selinux_mls_enabled)
+		return;
+
+	context->range.level[0].sens = low + 1;
+	context->range.level[1].sens = high + 1;
+}
+
+/**
+ * mls_export_cat - Export the MLS categories
+ * @context: the security context
+ * @low: the low category
+ * @low_len: length of the cat_low bitmap in bytes
+ * @high: the high category
+ * @high_len: length of the cat_high bitmap in bytes
+ *
+ * Description:
+ * Given the security context export the low MLS category bitmap into cat_low
+ * and the high category bitmap into cat_high.  The MLS categories are only
+ * exported if the pointers are not NULL, if they are NULL then that level is
+ * not exported.  The caller is responsibile for freeing the memory when
+ * finished.  Returns zero on success, negative values on failure.
+ *
+ */
+int mls_export_cat(const struct context *context,
+		   unsigned char **low,
+		   size_t *low_len,
+		   unsigned char **high,
+		   size_t *high_len)
+{
+	int rc = -EPERM;
+
+	if (!selinux_mls_enabled)
+		return 0;
+
+	if (low != NULL) {
+		rc = ebitmap_export(&context->range.level[0].cat,
+				    low,
+				    low_len);
+		if (rc != 0)
+			goto export_cat_failure;
+	}
+	if (high != NULL) {
+		rc = ebitmap_export(&context->range.level[1].cat,
+				    high,
+				    high_len);
+		if (rc != 0)
+			goto export_cat_failure;
+	}
+
+	return 0;
+
+export_cat_failure:
+	if (low != NULL)
+		kfree(*low);
+	if (high != NULL)
+		kfree(*high);
+	return rc;
+}
+
+/**
+ * mls_import_cat - Import the MLS categories
+ * @context: the security context
+ * @low: the low category
+ * @low_len: length of the cat_low bitmap in bytes
+ * @high: the high category
+ * @high_len: length of the cat_high bitmap in bytes
+ *
+ * Description:
+ * Given the security context and the two category bitmap strings import the
+ * categories into the security context.  The MLS categories are only imported
+ * if the pointers are not NULL, if they are NULL they are skipped.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+int mls_import_cat(struct context *context,
+		   const unsigned char *low,
+		   size_t low_len,
+		   const unsigned char *high,
+		   size_t high_len)
+{
+	int rc = -EPERM;
+
+	if (!selinux_mls_enabled)
+		return 0;
+
+	if (low != NULL) {
+		rc = ebitmap_import(low,
+				    low_len,
+				    &context->range.level[0].cat);
+		if (rc != 0)
+			goto import_cat_failure;
+	}
+	if (high != NULL) {
+		if (high == low)
+			rc = ebitmap_cpy(&context->range.level[1].cat,
+					 &context->range.level[0].cat);
+		else
+			rc = ebitmap_import(high,
+					    high_len,
+					    &context->range.level[1].cat);
+		if (rc != 0)
+			goto import_cat_failure;
+	}
+
+	return 0;
+
+import_cat_failure:
+	ebitmap_destroy(&context->range.level[0].cat);
+	ebitmap_destroy(&context->range.level[1].cat);
+	return rc;
+}
diff -puN security/selinux/ss/mls.h~git-net security/selinux/ss/mls.h
--- a/security/selinux/ss/mls.h~git-net
+++ a/security/selinux/ss/mls.h
@@ -10,6 +10,13 @@
  *
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support to import/export the MLS label
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
 
 #ifndef _SS_MLS_H_
 #define _SS_MLS_H_
@@ -17,6 +24,26 @@
 #include "context.h"
 #include "policydb.h"
 
+/*
+ * Copies the MLS range from `src' into `dst'.
+ */
+static inline int mls_copy_context(struct context *dst,
+				   struct context *src)
+{
+	int l, rc = 0;
+
+	/* Copy the MLS range from the source context */
+	for (l = 0; l < 2; l++) {
+		dst->range.level[l].sens = src->range.level[l].sens;
+		rc = ebitmap_cpy(&dst->range.level[l].cat,
+				 &src->range.level[l].cat);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
+
 int mls_compute_context_len(struct context *context);
 void mls_sid_to_context(struct context *context, char **scontext);
 int mls_context_isvalid(struct policydb *p, struct context *c);
@@ -42,5 +69,19 @@ int mls_compute_sid(struct context *scon
 int mls_setup_user_range(struct context *fromcon, struct user_datum *user,
                          struct context *usercon);
 
+void mls_export_lvl(const struct context *context, u32 *low, u32 *high);
+void mls_import_lvl(struct context *context, u32 low, u32 high);
+
+int mls_export_cat(const struct context *context,
+		   unsigned char **low,
+		   size_t *low_len,
+		   unsigned char **high,
+		   size_t *high_len);
+int mls_import_cat(struct context *context,
+		   const unsigned char *low,
+		   size_t low_len,
+		   const unsigned char *high,
+		   size_t high_len);
+
 #endif	/* _SS_MLS_H */
 
diff -puN security/selinux/ss/services.c~git-net security/selinux/ss/services.c
--- a/security/selinux/ss/services.c~git-net
+++ a/security/selinux/ss/services.c
@@ -13,6 +13,11 @@
  *
  * 	Added conditional policy language extensions
  *
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support for NetLabel
+ *
+ * Copyright (C) 2006 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  * Copyright (C) 2003 - 2004 Tresys Technology, LLC
  * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
@@ -29,6 +34,8 @@
 #include <linux/sched.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
+#include <net/sock.h>
+#include <net/netlabel.h>
 
 #include "flask.h"
 #include "avc.h"
@@ -40,6 +47,8 @@
 #include "services.h"
 #include "conditional.h"
 #include "mls.h"
+#include "objsec.h"
+#include "selinux_netlabel.h"
 
 extern void selnl_notify_policyload(u32 seqno);
 unsigned int policydb_loaded_version;
@@ -1241,6 +1250,7 @@ int security_load_policy(void *data, siz
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_netlbl_cache_invalidate();
 		return 0;
 	}
 
@@ -1295,6 +1305,7 @@ int security_load_policy(void *data, siz
 
 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_netlbl_cache_invalidate();
 
 	return 0;
 
@@ -1817,6 +1828,75 @@ out:
 	return rc;
 }
 
+/*
+ * security_sid_mls_copy() - computes a new sid based on the given
+ * sid and the mls portion of mls_sid.
+ */
+int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid)
+{
+	struct context *context1;
+	struct context *context2;
+	struct context newcon;
+	char *s;
+	u32 len;
+	int rc = 0;
+
+	if (!ss_initialized) {
+		*new_sid = sid;
+		goto out;
+	}
+
+	context_init(&newcon);
+
+	POLICY_RDLOCK;
+	context1 = sidtab_search(&sidtab, sid);
+	if (!context1) {
+		printk(KERN_ERR "security_sid_mls_copy:  unrecognized SID "
+		       "%d\n", sid);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	context2 = sidtab_search(&sidtab, mls_sid);
+	if (!context2) {
+		printk(KERN_ERR "security_sid_mls_copy:  unrecognized SID "
+		       "%d\n", mls_sid);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	newcon.user = context1->user;
+	newcon.role = context1->role;
+	newcon.type = context1->type;
+	rc = mls_copy_context(&newcon, context2);
+	if (rc)
+		goto out_unlock;
+
+
+	/* Check the validity of the new context. */
+	if (!policydb_context_isvalid(&policydb, &newcon)) {
+		rc = convert_context_handle_invalid_context(&newcon);
+		if (rc)
+			goto bad;
+	}
+
+	rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid);
+	goto out_unlock;
+
+bad:
+	if (!context_struct_to_string(&newcon, &s, &len)) {
+		audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+			  "security_sid_mls_copy: invalid context %s", s);
+		kfree(s);
+	}
+
+out_unlock:
+	POLICY_RDUNLOCK;
+	context_destroy(&newcon);
+out:
+	return rc;
+}
+
 struct selinux_audit_rule {
 	u32 au_seqno;
 	struct context au_ctxt;
@@ -2064,3 +2144,480 @@ void selinux_audit_set_callback(int (*ca
 {
 	aurule_callback = callback;
 }
+
+#ifdef CONFIG_NETLABEL
+/*
+ * This is the structure we store inside the NetLabel cache block.
+ */
+#define NETLBL_CACHE(x)           ((struct netlbl_cache *)(x))
+#define NETLBL_CACHE_T_NONE       0
+#define NETLBL_CACHE_T_SID        1
+#define NETLBL_CACHE_T_MLS        2
+struct netlbl_cache {
+	u32 type;
+	union {
+		u32 sid;
+		struct mls_range mls_label;
+	} data;
+};
+
+/**
+ * selinux_netlbl_cache_free - Free the NetLabel cached data
+ * @data: the data to free
+ *
+ * Description:
+ * This function is intended to be used as the free() callback inside the
+ * netlbl_lsm_cache structure.
+ *
+ */
+static void selinux_netlbl_cache_free(const void *data)
+{
+	struct netlbl_cache *cache = NETLBL_CACHE(data);
+	switch (cache->type) {
+	case NETLBL_CACHE_T_MLS:
+		ebitmap_destroy(&cache->data.mls_label.level[0].cat);
+		break;
+	}
+	kfree(data);
+}
+
+/**
+ * selinux_netlbl_cache_add - Add an entry to the NetLabel cache
+ * @skb: the packet
+ * @ctx: the SELinux context
+ *
+ * Description:
+ * Attempt to cache the context in @ctx, which was derived from the packet in
+ * @skb, in the NetLabel subsystem cache.
+ *
+ */
+static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx)
+{
+	struct netlbl_cache *cache = NULL;
+	struct netlbl_lsm_secattr secattr;
+
+	netlbl_secattr_init(&secattr);
+
+	cache = kzalloc(sizeof(*cache),	GFP_ATOMIC);
+	if (cache == NULL)
+		goto netlbl_cache_add_failure;
+	secattr.cache.free = selinux_netlbl_cache_free;
+	secattr.cache.data = (void *)cache;
+
+	cache->type = NETLBL_CACHE_T_MLS;
+	if (ebitmap_cpy(&cache->data.mls_label.level[0].cat,
+			&ctx->range.level[0].cat) != 0)
+		goto netlbl_cache_add_failure;
+	cache->data.mls_label.level[1].cat.highbit =
+		cache->data.mls_label.level[0].cat.highbit;
+	cache->data.mls_label.level[1].cat.node =
+		cache->data.mls_label.level[0].cat.node;
+	cache->data.mls_label.level[0].sens = ctx->range.level[0].sens;
+	cache->data.mls_label.level[1].sens = ctx->range.level[0].sens;
+
+	if (netlbl_cache_add(skb, &secattr) != 0)
+		goto netlbl_cache_add_failure;
+
+	return;
+
+netlbl_cache_add_failure:
+	netlbl_secattr_destroy(&secattr, 1);
+}
+
+/**
+ * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache
+ *
+ * Description:
+ * Invalidate the NetLabel security attribute mapping cache.
+ *
+ */
+void selinux_netlbl_cache_invalidate(void)
+{
+	netlbl_cache_invalidate();
+}
+
+/**
+ * selinux_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID
+ * @skb: the network packet
+ * @secattr: the NetLabel packet security attributes
+ * @base_sid: the SELinux SID to use as a context for MLS only attributes
+ * @sid: the SELinux SID
+ *
+ * Description:
+ * Convert the given NetLabel packet security attributes in @secattr into a
+ * SELinux SID.  If the @secattr field does not contain a full SELinux
+ * SID/context then use the context in @base_sid as the foundation.  If @skb
+ * is not NULL attempt to cache as much data as possibile.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb,
+					 struct netlbl_lsm_secattr *secattr,
+					 u32 base_sid,
+					 u32 *sid)
+{
+	int rc = -EIDRM;
+	struct context *ctx;
+	struct context ctx_new;
+	struct netlbl_cache *cache;
+
+	POLICY_RDLOCK;
+
+	if (secattr->cache.data) {
+		cache = NETLBL_CACHE(secattr->cache.data);
+		switch (cache->type) {
+		case NETLBL_CACHE_T_SID:
+			*sid = cache->data.sid;
+			rc = 0;
+			break;
+		case NETLBL_CACHE_T_MLS:
+			ctx = sidtab_search(&sidtab, base_sid);
+			if (ctx == NULL)
+				goto netlbl_secattr_to_sid_return;
+
+			ctx_new.user = ctx->user;
+			ctx_new.role = ctx->role;
+			ctx_new.type = ctx->type;
+			ctx_new.range.level[0].sens =
+				cache->data.mls_label.level[0].sens;
+			ctx_new.range.level[0].cat.highbit =
+				cache->data.mls_label.level[0].cat.highbit;
+			ctx_new.range.level[0].cat.node =
+				cache->data.mls_label.level[0].cat.node;
+			ctx_new.range.level[1].sens =
+				cache->data.mls_label.level[1].sens;
+			ctx_new.range.level[1].cat.highbit =
+				cache->data.mls_label.level[1].cat.highbit;
+			ctx_new.range.level[1].cat.node =
+				cache->data.mls_label.level[1].cat.node;
+
+			rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid);
+			break;
+		default:
+			goto netlbl_secattr_to_sid_return;
+		}
+	} else if (secattr->mls_lvl_vld) {
+		ctx = sidtab_search(&sidtab, base_sid);
+		if (ctx == NULL)
+			goto netlbl_secattr_to_sid_return;
+
+		ctx_new.user = ctx->user;
+		ctx_new.role = ctx->role;
+		ctx_new.type = ctx->type;
+		mls_import_lvl(&ctx_new, secattr->mls_lvl, secattr->mls_lvl);
+		if (secattr->mls_cat) {
+			if (mls_import_cat(&ctx_new,
+					   secattr->mls_cat,
+					   secattr->mls_cat_len,
+					   NULL,
+					   0) != 0)
+				goto netlbl_secattr_to_sid_return;
+			ctx_new.range.level[1].cat.highbit =
+				ctx_new.range.level[0].cat.highbit;
+			ctx_new.range.level[1].cat.node =
+				ctx_new.range.level[0].cat.node;
+		} else {
+			ebitmap_init(&ctx_new.range.level[0].cat);
+			ebitmap_init(&ctx_new.range.level[1].cat);
+		}
+		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
+			goto netlbl_secattr_to_sid_return_cleanup;
+
+		rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid);
+		if (rc != 0)
+			goto netlbl_secattr_to_sid_return_cleanup;
+
+		if (skb != NULL)
+			selinux_netlbl_cache_add(skb, &ctx_new);
+		ebitmap_destroy(&ctx_new.range.level[0].cat);
+	} else {
+		*sid = SECINITSID_UNLABELED;
+		rc = 0;
+	}
+
+netlbl_secattr_to_sid_return:
+	POLICY_RDUNLOCK;
+	return rc;
+netlbl_secattr_to_sid_return_cleanup:
+	ebitmap_destroy(&ctx_new.range.level[0].cat);
+	goto netlbl_secattr_to_sid_return;
+}
+
+/**
+ * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel
+ * @skb: the packet
+ * @base_sid: the SELinux SID to use as a context for MLS only attributes
+ * @sid: the SID
+ *
+ * Description:
+ * Call the NetLabel mechanism to get the security attributes of the given
+ * packet and use those attributes to determine the correct context/SID to
+ * assign to the packet.  Returns zero on success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
+					u32 base_sid,
+					u32 *sid)
+{
+	int rc;
+	struct netlbl_lsm_secattr secattr;
+
+	netlbl_secattr_init(&secattr);
+	rc = netlbl_skbuff_getattr(skb, &secattr);
+	if (rc == 0)
+		rc = selinux_netlbl_secattr_to_sid(skb,
+						   &secattr,
+						   base_sid,
+						   sid);
+	netlbl_secattr_destroy(&secattr, 0);
+
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_setsid - Label a socket using the NetLabel mechanism
+ * @sock: the socket to label
+ * @sid: the SID to use
+ *
+ * Description:
+ * Attempt to label a socket using the NetLabel mechanism using the given
+ * SID.  Returns zero values on success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid)
+{
+	int rc = -ENOENT;
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	struct context *ctx;
+
+	if (!ss_initialized)
+		return 0;
+
+	POLICY_RDLOCK;
+
+	ctx = sidtab_search(&sidtab, sid);
+	if (ctx == NULL)
+		goto netlbl_socket_setsid_return;
+
+	netlbl_secattr_init(&secattr);
+	secattr.domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1],
+				 GFP_ATOMIC);
+	mls_export_lvl(ctx, &secattr.mls_lvl, NULL);
+	secattr.mls_lvl_vld = 1;
+	mls_export_cat(ctx,
+		       &secattr.mls_cat,
+		       &secattr.mls_cat_len,
+		       NULL,
+		       NULL);
+
+	rc = netlbl_socket_setattr(sock, &secattr);
+	if (rc == 0)
+		sksec->nlbl_state = NLBL_LABELED;
+
+	netlbl_secattr_destroy(&secattr, 0);
+
+netlbl_socket_setsid_return:
+	POLICY_RDUNLOCK;
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_post_create - Label a socket using NetLabel
+ * @sock: the socket to label
+ * @sock_family: the socket family
+ * @sid: the SID to use
+ *
+ * Description:
+ * Attempt to label a socket using the NetLabel mechanism using the given
+ * SID.  Returns zero values on success, negative values on failure.
+ *
+ */
+int selinux_netlbl_socket_post_create(struct socket *sock,
+				      int sock_family,
+				      u32 sid)
+{
+	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	if (sock_family != PF_INET)
+		return 0;
+
+	sksec->sclass = isec->sclass;
+	sksec->nlbl_state = NLBL_REQUIRE;
+	return selinux_netlbl_socket_setsid(sock, sid);
+}
+
+/**
+ * selinux_netlbl_sock_graft - Netlabel the new socket
+ * @sk: the new connection
+ * @sock: the new socket
+ *
+ * Description:
+ * The connection represented by @sk is being grafted onto @sock so set the
+ * socket's NetLabel to match the SID of @sk.
+ *
+ */
+void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
+{
+	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
+	struct sk_security_struct *sksec = sk->sk_security;
+
+	if (sk->sk_family != PF_INET)
+		return;
+
+	sksec->nlbl_state = NLBL_REQUIRE;
+	sksec->peer_sid = sksec->sid;
+	sksec->sclass = isec->sclass;
+
+	/* Try to set the NetLabel on the socket to save time later, if we fail
+	 * here we will pick up the pieces in later calls to
+	 * selinux_netlbl_inode_permission(). */
+	selinux_netlbl_socket_setsid(sock, sksec->sid);
+}
+
+/**
+ * selinux_netlbl_inet_conn_request - Handle a new connection request
+ * @skb: the packet
+ * @sock_sid: the SID of the parent socket
+ *
+ * Description:
+ * If present, use the security attributes of the packet in @skb and the
+ * parent sock's SID to arrive at a SID for the new child sock.  Returns the
+ * SID of the connection or SECSID_NULL on failure.
+ *
+ */
+u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid)
+{
+	int rc;
+	u32 peer_sid;
+
+	rc = selinux_netlbl_skbuff_getsid(skb, sock_sid, &peer_sid);
+	if (rc != 0)
+		return SECSID_NULL;
+
+	if (peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return peer_sid;
+}
+
+/**
+ * __selinux_netlbl_inode_permission - Label a socket using NetLabel
+ * @inode: the file descriptor's inode
+ * @mask: the permission mask
+ *
+ * Description:
+ * Try to label a socket with the inode's SID using NetLabel.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int __selinux_netlbl_inode_permission(struct inode *inode, int mask)
+{
+	int rc;
+	struct socket *sock = SOCKET_I(inode);
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	lock_sock(sock->sk);
+	rc = selinux_netlbl_socket_setsid(sock, sksec->sid);
+	release_sock(sock->sk);
+
+	return rc;
+}
+
+/**
+ * selinux_netlbl_sock_rcv_skb - Do an inbound access check using NetLabel
+ * @sksec: the sock's sk_security_struct
+ * @skb: the packet
+ * @ad: the audit data
+ *
+ * Description:
+ * Fetch the NetLabel security attributes from @skb and perform an access check
+ * against the receiving socket.  Returns zero on success, negative values on
+ * error.
+ *
+ */
+int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+				struct sk_buff *skb,
+				struct avc_audit_data *ad)
+{
+	int rc;
+	u32 netlbl_sid;
+	u32 recv_perm;
+
+	rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid);
+	if (rc != 0)
+		return rc;
+
+	if (netlbl_sid == SECINITSID_UNLABELED)
+		return 0;
+
+	switch (sksec->sclass) {
+	case SECCLASS_UDP_SOCKET:
+		recv_perm = UDP_SOCKET__RECV_MSG;
+		break;
+	case SECCLASS_TCP_SOCKET:
+		recv_perm = TCP_SOCKET__RECV_MSG;
+		break;
+	default:
+		recv_perm = RAWIP_SOCKET__RECV_MSG;
+	}
+
+	rc = avc_has_perm(sksec->sid,
+			  netlbl_sid,
+			  sksec->sclass,
+			  recv_perm,
+			  ad);
+	if (rc == 0)
+		return 0;
+
+	netlbl_skbuff_err(skb, rc);
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_peersid - Return the peer SID of a connected socket
+ * @sock: the socket
+ *
+ * Description:
+ * Examine @sock to find the connected peer's SID.  Returns the SID on success
+ * or SECSID_NULL on error.
+ *
+ */
+u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock)
+{
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	if (sksec->peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return sksec->peer_sid;
+}
+
+/**
+ * selinux_netlbl_socket_getpeersec_dgram - Return the SID of a NetLabel packet
+ * @skb: the packet
+ *
+ * Description:
+ * Examine @skb to find the SID assigned to it by NetLabel.  Returns the SID on
+ * success, SECSID_NULL on error.
+ *
+ */
+u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
+{
+	int peer_sid;
+	struct sock *sk = skb->sk;
+	struct inode_security_struct *isec;
+
+	if (sk == NULL || sk->sk_socket == NULL)
+		return SECSID_NULL;
+
+	isec = SOCK_INODE(sk->sk_socket)->i_security;
+	if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0)
+		return SECSID_NULL;
+	if (peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return peer_sid;
+}
+#endif /* CONFIG_NETLABEL */
diff -puN security/selinux/xfrm.c~git-net security/selinux/xfrm.c
--- a/security/selinux/xfrm.c~git-net
+++ a/security/selinux/xfrm.c
@@ -6,7 +6,12 @@
  *  Authors:  Serge Hallyn <sergeh@us.ibm.com>
  *	      Trent Jaeger <jaegert@us.ibm.com>
  *
+ *  Updated: Venkat Yekkirala <vyekkirala@TrustedCS.com>
+ *
+ *           Granular IPSec Associations for use in MLS environments.
+ *
  *  Copyright (C) 2005 International Business Machines Corporation
+ *  Copyright (C) 2006 Trusted Computer Solutions, Inc.
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -67,10 +72,10 @@ static inline int selinux_authorizable_x
 }
 
 /*
- * LSM hook implementation that authorizes that a socket can be used
- * with the corresponding xfrm_sec_ctx and direction.
+ * LSM hook implementation that authorizes that a flow can use
+ * a xfrm policy rule.
  */
-int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
 	int rc = 0;
 	u32 sel_sid = SECINITSID_UNLABELED;
@@ -84,27 +89,130 @@ int selinux_xfrm_policy_lookup(struct xf
 		sel_sid = ctx->ctx_sid;
 	}
 
-	rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION,
-			  ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM :
-			   ((dir == FLOW_DIR_OUT) ?  ASSOCIATION__SENDTO :
-			    (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))),
+	rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__POLMATCH,
 			  NULL);
 
 	return rc;
 }
 
 /*
+ * LSM hook implementation that authorizes that a state matches
+ * the given policy, flow combo.
+ */
+
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
+			struct flowi *fl)
+{
+	u32 state_sid;
+	u32 pol_sid;
+	int err;
+
+	if (x->security)
+		state_sid = x->security->ctx_sid;
+	else
+		state_sid = SECINITSID_UNLABELED;
+
+	if (xp->security)
+		pol_sid = xp->security->ctx_sid;
+	else
+		pol_sid = SECINITSID_UNLABELED;
+
+	err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__POLMATCH,
+			  NULL);
+
+	if (err)
+		return 0;
+
+	return selinux_xfrm_flow_state_match(fl, x);
+}
+
+/*
+ * LSM hook implementation that authorizes that a particular outgoing flow
+ * can use a given security association.
+ */
+
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	int rc = 0;
+	u32 sel_sid = SECINITSID_UNLABELED;
+	struct xfrm_sec_ctx *ctx;
+
+	/* Context sid is either set to label or ANY_ASSOC */
+	if ((ctx = xfrm->security)) {
+		if (!selinux_authorizable_ctx(ctx))
+			return 0;
+
+		sel_sid = ctx->ctx_sid;
+	}
+
+	rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__SENDTO,
+			  NULL)? 0:1;
+
+	return rc;
+}
+
+/*
+ * LSM hook implementation that determines the sid for the session.
+ */
+
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+{
+	struct sec_path *sp;
+
+	*sid = SECSID_NULL;
+
+	if (skb == NULL)
+		return 0;
+
+	sp = skb->sp;
+	if (sp) {
+		int i, sid_set = 0;
+
+		for (i = sp->len-1; i >= 0; i--) {
+			struct xfrm_state *x = sp->xvec[i];
+			if (selinux_authorizable_xfrm(x)) {
+				struct xfrm_sec_ctx *ctx = x->security;
+
+				if (!sid_set) {
+					*sid = ctx->ctx_sid;
+					sid_set = 1;
+
+					if (!ckall)
+						break;
+				}
+				else if (*sid != ctx->ctx_sid)
+					return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
  * Security blob allocation for xfrm_policy and xfrm_state
  * CTX does not have a meaningful value on input
  */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx)
+static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
+	struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid)
 {
 	int rc = 0;
 	struct task_security_struct *tsec = current->security;
-	struct xfrm_sec_ctx *ctx;
+	struct xfrm_sec_ctx *ctx = NULL;
+	char *ctx_str = NULL;
+	u32 str_len;
+	u32 ctx_sid;
 
-	BUG_ON(!uctx);
-	BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX);
+	BUG_ON(uctx && pol);
+
+	if (!uctx)
+		goto not_from_user;
+
+	if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX)
+		return -EINVAL;
 
 	if (uctx->ctx_len >= PAGE_SIZE)
 		return -ENOMEM;
@@ -141,9 +249,43 @@ static int selinux_xfrm_sec_ctx_alloc(st
 
 	return rc;
 
+not_from_user:
+	if (pol) {
+		rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid);
+		if (rc)
+			goto out;
+	}
+	else
+		ctx_sid = sid;
+
+	rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len);
+	if (rc)
+		goto out;
+
+	*ctxp = ctx = kmalloc(sizeof(*ctx) +
+			      str_len,
+			      GFP_ATOMIC);
+
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->ctx_doi = XFRM_SC_DOI_LSM;
+	ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+	ctx->ctx_sid = ctx_sid;
+	ctx->ctx_len = str_len;
+	memcpy(ctx->ctx_str,
+	       ctx_str,
+	       str_len);
+
+	goto out2;
+
 out:
 	*ctxp = NULL;
 	kfree(ctx);
+out2:
+	kfree(ctx_str);
 	return rc;
 }
 
@@ -151,13 +293,23 @@ out:
  * LSM hook implementation that allocs and transfers uctx spec to
  * xfrm_policy.
  */
-int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx)
+int selinux_xfrm_policy_alloc(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *uctx, struct sock *sk)
 {
 	int err;
+	u32 sid;
 
 	BUG_ON(!xp);
+	BUG_ON(uctx && sk);
 
-	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx);
+	if (sk) {
+		struct sk_security_struct *ssec = sk->sk_security;
+		sid = ssec->sid;
+	}
+	else
+		sid = SECSID_NULL;
+
+	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid);
 	return err;
 }
 
@@ -217,13 +369,14 @@ int selinux_xfrm_policy_delete(struct xf
  * LSM hook implementation that allocs and transfers sec_ctx spec to
  * xfrm_state.
  */
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx)
+int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
+		struct xfrm_sec_ctx *pol, u32 secid)
 {
 	int err;
 
 	BUG_ON(!x);
 
-	err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx);
+	err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid);
 	return err;
 }
 
@@ -329,38 +482,30 @@ int selinux_xfrm_state_delete(struct xfr
  * we need to check for unlabelled access since this may not have
  * gone thru the IPSec process.
  */
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
+int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
+				struct avc_audit_data *ad)
 {
 	int i, rc = 0;
 	struct sec_path *sp;
+	u32 sel_sid = SECINITSID_UNLABELED;
 
 	sp = skb->sp;
 
 	if (sp) {
-		/*
-		 * __xfrm_policy_check does not approve unless xfrm_policy_ok
-		 * says that spi's match for policy and the socket.
-		 *
-		 *  Only need to verify the existence of an authorizable sp.
-		 */
 		for (i = 0; i < sp->len; i++) {
 			struct xfrm_state *x = sp->xvec[i];
 
-			if (x && selinux_authorizable_xfrm(x))
-				goto accept;
+			if (x && selinux_authorizable_xfrm(x)) {
+				struct xfrm_sec_ctx *ctx = x->security;
+				sel_sid = ctx->ctx_sid;
+				break;
+			}
 		}
 	}
 
-	/* check SELinux sock for unlabelled access */
-	rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-			  ASSOCIATION__RECVFROM, NULL);
-	if (rc)
-		goto drop;
-
-accept:
-	return 0;
+	rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__RECVFROM, ad);
 
-drop:
 	return rc;
 }
 
@@ -371,7 +516,8 @@ drop:
  * If we do have a authorizable security association, then it has already been
  * checked in xfrm_policy_lookup hook.
  */
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
+int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+					struct avc_audit_data *ad)
 {
 	struct dst_entry *dst;
 	int rc = 0;
@@ -391,7 +537,7 @@ int selinux_xfrm_postroute_last(u32 isec
 	}
 
 	rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-			  ASSOCIATION__SENDTO, NULL);
+			  ASSOCIATION__SENDTO, ad);
 out:
 	return rc;
 }
diff -puN usr/Makefile~git-net usr/Makefile
--- a/usr/Makefile~git-net
+++ a/usr/Makefile
@@ -37,6 +37,9 @@ quiet_cmd_initfs = GEN     $@
       cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input)
 
 targets := initramfs_data.cpio.gz
+# do not try to update files included in initramfs
+$(deps_initramfs): ;
+
 $(deps_initramfs): klibcdirs
 # We rebuild initramfs_data.cpio.gz if:
 # 1) Any included file is newer then initramfs_data.cpio.gz
diff -puN kernel/taskstats.c~git-net kernel/taskstats.c
--- a/kernel/taskstats.c~git-net
+++ a/kernel/taskstats.c
@@ -75,7 +75,7 @@ static int prepare_reply(struct genl_inf
 	/*
 	 * If new attributes are added, please revisit this allocation
 	 */
-	skb = nlmsg_new(size);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
_