Subject: [PATCH] First working snapshot of CLONE_NHOST From: Eric W. Biederman Date: 1133532763 -0700 ipv4 for (raw,udp,tcp) is roughly working unix domain sockets are a noop ipv6 doesn't work yet but it should just be a matter of adapting the changes from ipv4 netlink sockets sort of work but need to be redesigned. Bugfixes in ipc/util.h for sysvipc support Bugfixes in kernel/fork.c for pidspace support (The error path needed additional cleanup) The code is structutured so that after CLONE_NHOST unless the appropriate flag has been set address families and protocols by default will fail at socket creation time. This allows for a partial implementation. --- arch/i386/kernel/init_task.c | 1 arch/i386/kernel/process.c | 1 arch/i386/kernel/sys_i386.c | 1 arch/i386/kernel/traps.c | 1 drivers/char/random.c | 1 drivers/net/Space.c | 4 + drivers/net/loopback.c | 60 ++++++++++++--------- fs/binfmt_elf.c | 1 fs/exec.c | 1 fs/nfs/file.c | 22 +++++++- fs/nfs/nfsroot.c | 1 include/asm-i386/elf.h | 2 - include/linux/inetdevice.h | 11 +++- include/linux/lockd/lockd.h | 1 include/linux/net.h | 1 include/linux/netdevice.h | 1 include/linux/nethost.h | 78 +++++++++++++++++++++++++++ include/linux/netlink.h | 3 + include/linux/sched.h | 4 + include/linux/utsname.h | 46 ---------------- include/net/addrconf.h | 3 + include/net/if_inet6.h | 4 + include/net/inet_hashtables.h | 35 +++++++----- include/net/ip_fib.h | 6 +- include/net/protocol.h | 7 +- include/net/raw.h | 4 + include/net/route.h | 7 +- include/net/sock.h | 2 + include/net/tcp.h | 2 - include/net/udp.h | 2 - init/main.c | 1 init/version.c | 3 + ipc/util.h | 1 kernel/exit.c | 2 - kernel/fork.c | 5 +- kernel/sys.c | 1 kernel/sysctl.c | 1 net/core/Makefile | 2 - net/core/dst.c | 9 ++- net/core/host.c | 41 ++++++++++++++ net/core/rtnetlink.c | 1 net/ipv4/af_inet.c | 19 ++++++- net/ipv4/arp.c | 4 + net/ipv4/devinet.c | 111 ++++++++++++++++++++++++++++++++++----- net/ipv4/fib_frontend.c | 8 +-- net/ipv4/fib_semantics.c | 12 ++++ net/ipv4/icmp.c | 13 +++-- net/ipv4/inet_connection_sock.c | 1 net/ipv4/inet_hashtables.c | 6 ++ net/ipv4/ip_input.c | 3 + net/ipv4/ip_options.c | 11 ++-- net/ipv4/ipconfig.c | 10 ++-- net/ipv4/raw.c | 18 +++++- net/ipv4/route.c | 87 ++++++++++++++++++++----------- net/ipv4/tcp_ipv4.c | 16 ++++-- net/ipv4/udp.c | 19 +++++-- net/ipv4/xfrm4_policy.c | 13 ++++- net/ipv6/addrconf.c | 45 +++++++++++++--- net/ipv6/af_inet6.c | 6 ++ net/ipv6/inet6_hashtables.c | 4 + net/ipv6/ip6_flowlabel.c | 2 - net/ipv6/raw.c | 2 + net/ipv6/route.c | 17 +++--- net/ipv6/udp.c | 2 + net/ipv6/xfrm6_policy.c | 3 + net/netlink/af_netlink.c | 46 ++++++++++++++-- net/socket.c | 8 +++ net/sunrpc/clnt.c | 1 net/unix/af_unix.c | 2 + net/xfrm/xfrm_policy.c | 7 ++ 70 files changed, 640 insertions(+), 236 deletions(-) create mode 100644 include/linux/nethost.h create mode 100644 net/core/host.c b2416358cc35840ed9dc8aadc4b7549845815034 diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index 4037eb2..68a6926 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c89bb2a..1ed4d62 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index 64dce92..0ec382b 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 674c7ea..936852f 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/random.c b/drivers/char/random.c index 33f07c2..006f435 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -224,6 +224,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/Space.c b/drivers/net/Space.c index 60304f7..9fc460a 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -35,6 +35,7 @@ #include #include #include +#include /* A unified ethernet device probe. This is the easiest way to have every ethernet adaptor have the name "eth[0123...]". @@ -355,14 +356,13 @@ static void __init trif_probe2(int unit) * The loopback device is global so it can be directly referenced * by the network code. Also, it must be first on device list. */ -extern int loopback_init(void); /* Statically configured drivers -- order matters here. */ static int __init net_olddevs_init(void) { int num; - if (loopback_init()) { + if (nethost_init()) { printk(KERN_ERR "Network loopback device setup failed\n"); } diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 690a1aa..2e28791 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ #include #include #include +#include #include #include #include /* For the statistics structure. */ @@ -198,42 +200,46 @@ static struct ethtool_ops loopback_ethto .set_tso = ethtool_op_set_tso, }; -struct net_device loopback_dev = { - .name = "lo", - .mtu = (16 * 1024) + 20 + 20 + 12, - .hard_start_xmit = loopback_xmit, - .hard_header = eth_header, - .hard_header_cache = eth_header_cache, - .header_cache_update = eth_header_cache_update, - .hard_header_len = ETH_HLEN, /* 14 */ - .addr_len = ETH_ALEN, /* 6 */ - .tx_queue_len = 0, - .type = ARPHRD_LOOPBACK, /* 0x0001*/ - .rebuild_header = eth_rebuild_header, - .flags = IFF_LOOPBACK, - .features = NETIF_F_SG | NETIF_F_FRAGLIST -#ifdef LOOPBACK_TSO - | NETIF_F_TSO -#endif - | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA - | NETIF_F_LLTX, - .ethtool_ops = &loopback_ethtool_ops, -}; - /* Setup and register the loopback device. */ -int __init loopback_init(void) +int loopback_init(struct net_device *dev) { struct net_device_stats *stats; + strcpy(dev->name, (dev == &init_host.loopback_dev)?"lo": "lo%d"); + dev->mtu = (16 * 1024) + 20 + 20 + 12, + dev->hard_start_xmit = loopback_xmit, + dev->hard_header = eth_header, + dev->hard_header_cache = eth_header_cache, + dev->header_cache_update = eth_header_cache_update, + dev->hard_header_len = ETH_HLEN, /* 14 */ + dev->addr_len = ETH_ALEN, /* 6 */ + dev->tx_queue_len = 0, + dev->type = ARPHRD_LOOPBACK, /* 0x0001*/ + dev->rebuild_header = eth_rebuild_header, + dev->flags = IFF_LOOPBACK, + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST +#ifdef LOOPBACK_TSO + | NETIF_F_TSO +#endif + | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA + | NETIF_F_LLTX, + dev->ethtool_ops = &loopback_ethtool_ops, + /* Can survive without statistics */ stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); if (stats) { memset(stats, 0, sizeof(struct net_device_stats)); - loopback_dev.priv = stats; - loopback_dev.get_stats = &get_stats; + dev->priv = stats; + dev->get_stats = &get_stats; } - return register_netdev(&loopback_dev); + return register_netdev(dev); }; -EXPORT_SYMBOL(loopback_dev); +void loopback_free(struct net_device *dev) +{ + struct net_device_stats *stats = dev->priv; + unregister_netdev(dev); + if (stats) + kfree(stats); +} diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 29b34f3..39d3ca2 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/fs/exec.c b/fs/exec.c index a3ac9c4..4d5af27 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1eaa166..50444a9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -406,9 +406,25 @@ static int do_vfs_lock(struct file *file default: BUG(); } - if (res < 0) - printk(KERN_WARNING "%s:%s: VFS is out of sync with lock manager!\n", - __FILE__, __func__); + if (res < 0) { + char *tmp = (char *)__get_free_page(GFP_KERNEL); + const char *path; + int len; + path = ERR_PTR(-ENOMEM); + if (tmp) { + path = d_path(file->f_dentry, file->f_vfsmnt, tmp, PAGE_SIZE); + } + if (!IS_ERR(path)) { + len = tmp + PAGE_SIZE - 1 - path; + } else { + path = file->f_dentry->d_name.name; + len = file->f_dentry->d_name.len; + } + printk(KERN_WARNING "%s:%s: VFS is out of sync with lock manager! tid: %d file: %*.*s\n", + __FILE__, __func__, current->tid, + len, len, path); + free_page((unsigned long)tmp); + } return res; } diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 98b477b..d769c09 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #include #include diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 6b4d51d..8a54ed2 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -11,8 +11,6 @@ #include /* for savesegment */ #include -#include - #define R_386_NONE 0 #define R_386_32 1 #define R_386_PC32 2 diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fd7af86..e8b315c 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -84,10 +84,12 @@ struct in_device #define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce)) #define IN_DEV_ARP_IGNORE(in_dev) (max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore)) +struct nethost; struct in_ifaddr { struct in_ifaddr *ifa_next; struct in_device *ifa_dev; + struct nethost *ifa_host; /* Which host am I for? */ struct rcu_head rcu_head; u32 ifa_local; u32 ifa_address; @@ -109,8 +111,13 @@ extern int devinet_ioctl(unsigned int c extern void devinet_init(void); extern struct in_device *inetdev_init(struct net_device *dev); extern struct in_device *inetdev_by_index(int); -extern u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope); -extern u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope); +extern u32 inet_select_addr(struct nethost *host, + const struct net_device *dev, u32 dst, + int scope); +extern u32 inet_confirm_addr(struct nethost *host, + const struct net_device *dev, u32 dst, + u32 local, int scope); +extern struct nethost * inet_dev_host(int ifindex, u32 addr); extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask); extern void inet_forward_change(void); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 52768c2..c988c26 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/net.h b/include/linux/net.h index 4e98158..5994caa 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -167,6 +167,7 @@ struct net_proto_family { short authentication; short encryption; short encrypt_net; + unsigned multi_host : 1; /* Flag indicating multiple host support */ struct module *owner; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 368e4c8..3b3d5aa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -533,7 +533,6 @@ struct packet_type { #include #include -extern struct net_device loopback_dev; /* The loopback */ extern struct net_device *dev_base; /* All devices */ extern rwlock_t dev_base_lock; /* Device list lock */ diff --git a/include/linux/nethost.h b/include/linux/nethost.h new file mode 100644 index 0000000..1c55185 --- /dev/null +++ b/include/linux/nethost.h @@ -0,0 +1,78 @@ +#ifndef _LINUX_NETHOST_H +#define _LINUX_NETHOST_H + +#include +#include +#include +#include +#include + +struct nethost { + atomic_t count; + struct new_utsname utsname; +#ifdef CONFIG_NET + struct net_device loopback_dev; +#endif +}; + +extern struct nethost init_host; +extern int loopback_init(struct net_device *loopback_dev); +extern void loopback_free(struct net_device *loopback_dev); + +#ifdef CONFIG_NET + +extern void __put_host(struct nethost *host); +extern int __copy_host(int flags, struct task_struct *p); +extern int nethost_init(void); + +#else /* ! CONFIG_NET */ +static inline void __put_host(struct nethost *host) +{ + kfree(host); +} + +static inline void __copy_host(int flags, struct task_struct *p) +{ + struct nethost *host; + host = kmalloc(sizeof(*host), GFP_KERNEL); + if (!host) + return -ENOMEM; + atomic_set(&host->count, 1); + down_read(&uts_sem); + memcpy(&host->utsname, &p->host->utsname, sizeof(host->utsname)); + up_read(&uts_sem); + p->host = host; +} + +#endif /* CONFIG_NET */ + +static inline void get_host(struct nethost *host) +{ + atomic_inc(&host->count); +} + +static inline void put_host(struct nethost *host) +{ + if (atomic_dec_and_test(&host->count)) + __put_host(host); +} + +static inline int copy_host(int flags, struct task_struct *p) +{ + int ret = 0; + if (likely(!(flags & CLONE_NHOST))) { + get_host(p->host); + } else { + ret = __copy_host(flags, p); + } + return ret; +} + +static inline void exit_host(struct task_struct *tsk) +{ + struct nethost *host = tsk->host; + tsk->host = NULL; + put_host(host); +} + +#endif /* _LINUX_NETHOST_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index ba25ca8..9a3d6bd 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -29,7 +29,7 @@ struct sockaddr_nl sa_family_t nl_family; /* AF_NETLINK */ unsigned short nl_pad; /* zero */ __u32 nl_pid; /* process pid */ - __u32 nl_groups; /* multicast groups mask */ + __u32 nl_groups; /* multicast groups mask */ }; struct nlmsghdr @@ -210,6 +210,7 @@ extern int netlink_dump_start(struct soc #define NL_NONROOT_RECV 0x1 #define NL_NONROOT_SEND 0x2 extern void netlink_set_nonroot(int protocol, unsigned flag); +extern void netlink_set_multihost(int protocol); #endif /* __KERNEL__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d112af..e9c3124 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -233,7 +233,7 @@ asmlinkage void schedule(void); struct namespace; struct pspace; struct ipc_ns; -struct host; +struct nethost; /* Maximum number of active map areas.. This is a random (large) number */ #define DEFAULT_MAX_MAP_COUNT 65536 @@ -760,7 +760,7 @@ struct task_struct { /* ipc namespace */ struct ipc_ns *ipc; /* network namespace */ - struct host *host; + struct nethost *host; /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 5e59dd0..9b9e533 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -32,50 +32,4 @@ struct new_utsname { extern struct rw_semaphore uts_sem; -#include -#include - -struct host { - atomic_t count; - struct new_utsname utsname; -}; - -extern struct host init_host; - -static inline void get_host(struct host *host) -{ - atomic_inc(&host->count); -} - -static inline void put_host(struct host *host) -{ - if (atomic_dec_and_test(&host->count)) - kfree(host); -} - -static inline int copy_host(int flags, struct task_struct *p) -{ - if (likely(!(flags & CLONE_NHOST))) { - get_host(p->host); - } else { - struct host *host; - host = kmalloc(sizeof(*host), GFP_KERNEL); - if (!host) - return -ENOMEM; - atomic_set(&host->count, 1); - down_read(&uts_sem); - memcpy(&host->utsname, &p->host->utsname, sizeof(host->utsname)); - up_read(&uts_sem); - p->host = host; - } - return 0; -} - -static inline void exit_host(struct task_struct *tsk) -{ - struct host *host = tsk->host; - tsk->host = NULL; - put_host(host); -} - #endif /* _LINUX_UTSNAME_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 750e250..06c3eba 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -62,6 +62,9 @@ extern int addrconf_set_dstaddr(void _ extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); +extern int ipv6_host_match(const struct sock *sk, + const int dif, + const struct in6_addr *addr); extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index e97a9ac..e349724 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -31,6 +31,8 @@ #ifdef __KERNEL__ +struct nethost; + struct inet6_ifaddr { struct in6_addr addr; @@ -56,6 +58,8 @@ struct inet6_ifaddr struct inet6_ifaddr *lst_next; /* next addr in addr_lst */ struct inet6_ifaddr *if_next; /* next addr in inet6_dev */ + struct nethost *host; /* Which host am I for? */ + #ifdef CONFIG_IPV6_PRIVACY struct inet6_ifaddr *tmp_next; /* next addr in tempaddr_lst */ struct inet6_ifaddr *ifpub; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f50f959..d27f9f6 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -296,13 +296,15 @@ static inline int inet_iif(const struct extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, - const int dif); + const int dif, + const struct nethost *host); /* Optimize the common listener case. */ static inline struct sock * inet_lookup_listener(struct inet_hashinfo *hashinfo, const u32 daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, + struct nethost *host) { struct sock *sk = NULL; const struct hlist_head *head; @@ -315,9 +317,10 @@ static inline struct sock * if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) + !sk->sk_bound_dev_if && + (!host || host == sk->sk_host)) goto sherry_cache; - sk = __inet_lookup_listener(head, daddr, hnum, dif); + sk = __inet_lookup_listener(head, daddr, hnum, dif, host); } if (sk) { sherry_cache: @@ -344,11 +347,12 @@ sherry_cache: #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __host)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ + ((__host) == (__sk)->sk_host)) #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ @@ -356,12 +360,13 @@ sherry_cache: (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __host)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ + ((__host) == (__sk)->sk_host)) #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_hash == (__hash)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ @@ -380,7 +385,7 @@ static inline struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, const u32 saddr, const u16 sport, const u32 daddr, const u16 hnum, - const int dif) + const int dif, struct nethost *host) { INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(sport, hnum); @@ -395,7 +400,7 @@ static inline struct sock * prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, host)) goto hit; /* You sunk my battleship! */ } @@ -416,22 +421,22 @@ hit: static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, const u32 saddr, const u16 sport, const u32 daddr, const u16 hnum, - const int dif) + const int dif, struct nethost *host) { struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, - hnum, dif); - return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); + hnum, dif, host); + return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif, host); } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, const u32 saddr, const u16 sport, const u32 daddr, const u16 dport, - const int dif) + const int dif, struct nethost *host) { struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, host); local_bh_enable(); return sk; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 14de4eb..0cd66a4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -136,7 +136,7 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -#define FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __fib_res_prefsrc(&res)) +#define FIB_RES_PREFSRC(host, res) (fib_res_prefsrc(host, &res)) #define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) @@ -234,7 +234,7 @@ extern int inet_rtm_delroute(struct sk_b extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb); -extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, +extern int fib_validate_source(struct nethost *host, u32 src, u32 dst, u8 tos, int oif, struct net_device *dev, u32 *spec_dst, u32 *itag); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); @@ -244,7 +244,7 @@ extern int fib_sync_down(u32 local, stru extern int fib_sync_up(struct net_device *dev); extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, struct kern_rta *rta, struct rtentry *r); -extern u32 __fib_res_prefsrc(struct fib_result *res); +extern u32 fib_res_prefsrc(struct nethost *host, struct fib_result *res); /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(int id); diff --git a/include/net/protocol.h b/include/net/protocol.h index 357691f..889b561 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,9 +35,9 @@ /* This is used to register protocols. */ struct net_protocol { - int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, u32 info); - int no_policy; + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, struct nethost *host, u32 info); + int no_policy; }; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) @@ -76,6 +76,7 @@ struct inet_protosw { }; #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ +#define INET_PROTOSW_MULTIHOST 0x04 /* Safe to use after CLONE_HOST? */ extern struct net_protocol *inet_protocol_base; extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; diff --git a/include/net/raw.h b/include/net/raw.h index f479174..a5f9ba5 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -32,10 +32,10 @@ extern struct hlist_head raw_v4_htable[R extern rwlock_t raw_v4_lock; - +struct nethost; extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, - int dif); + int dif, const struct nethost *host); extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); diff --git a/include/net/route.h b/include/net/route.h index dbe79ca..fff3acd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -75,6 +75,7 @@ struct rtable /* Miscellaneous cached information */ __u32 rt_spec_dst; /* RFC1122 specific destination */ struct inet_peer *peer; /* long-living peer info */ + struct nethost *rt_host; /* Only used on local input routes */ }; struct ip_rt_acct @@ -113,7 +114,7 @@ extern void ip_rt_redirect(u32 old_gw, u32 src, u8 tos, struct net_device *dev); extern void ip_rt_advice(struct rtable **rp, int advice); extern void rt_cache_flush(int how); -extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); +extern int __ip_route_output_key(struct nethost *host, struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); @@ -123,7 +124,7 @@ extern void ip_rt_send_redirect(struct extern unsigned inet_addr_type(u32 addr); extern void ip_rt_multicast_event(struct in_device *); extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); -extern void ip_rt_get_source(u8 *src, struct rtable *rt); +extern void ip_rt_get_source(struct nethost *host, u8 *src, struct rtable *rt); extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); static inline void ip_rt_put(struct rtable * rt) @@ -156,7 +157,7 @@ static inline int ip_route_connect(struc int err; if (!dst || !src) { - err = __ip_route_output_key(rp, &fl); + err = __ip_route_output_key(sk->sk_host, rp, &fl); if (err) return err; fl.fl4_dst = (*rp)->rt_dst; diff --git a/include/net/sock.h b/include/net/sock.h index ecb7552..2c9136e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -117,6 +117,7 @@ struct sock_common { struct proto *skc_prot; }; +struct nethost; /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -243,6 +244,7 @@ struct sock { struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; int sk_write_pending; + struct nethost *sk_host; void *sk_security; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); diff --git a/include/net/tcp.h b/include/net/tcp.h index c24339c..f8d3451 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -302,7 +302,7 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_st #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) -extern void tcp_v4_err(struct sk_buff *skb, u32); +extern void tcp_v4_err(struct sk_buff *skb, struct nethost *host, u32); extern void tcp_shutdown (struct sock *sk, int how); diff --git a/include/net/udp.h b/include/net/udp.h index 107b9d7..b861bf4 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -63,7 +63,7 @@ static inline int udp_lport_inuse(u16 nu extern struct proto udp_prot; -extern void udp_err(struct sk_buff *, u32); +extern void udp_err(struct sk_buff *, struct nethost *host, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); diff --git a/init/main.c b/init/main.c index b2342eb..f8a381e 100644 --- a/init/main.c +++ b/init/main.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include diff --git a/init/version.c b/init/version.c index e2d5226..66e2592 100644 --- a/init/version.c +++ b/init/version.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #define version(a) Version_ ## a @@ -17,7 +18,7 @@ int version_string(LINUX_VERSION_CODE); -struct host init_host = { +struct nethost init_host = { .count = ATOMIC_INIT(1), .utsname = { .sysname = UTS_SYSNAME, diff --git a/ipc/util.h b/ipc/util.h index 8523aff..a426379 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -53,6 +53,7 @@ void ipc_init_ids(struct ipc_ids* ids, i void ipc_free_ids(struct ipc_ids *ids, void (*ipc_free)(struct kern_ipc_perm *p)); #ifdef CONFIG_PROC_FS +struct seq_operations; extern void *ipc_proc_next(struct seq_file *s, void *it, loff_t *pos); extern void *ipc_proc_start(struct seq_file *s, loff_t *pos); extern void ipc_proc_stop(struct seq_file *s, void *it); diff --git a/kernel/exit.c b/kernel/exit.c index 919897d..82c49af 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index afec7ac..0cbba91 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include @@ -1217,6 +1217,9 @@ bad_fork_cleanup_policy: mpol_free(p->mempolicy); #endif bad_fork_cleanup_pspace: + if (p->wid != p->tid) { + free_pidmap(p->pspace, p->tid); + } exit_pspace(p); bad_fork_cleanup: if (p->binfmt) diff --git a/kernel/sys.c b/kernel/sys.c index 46bc2d5..3d6e1e8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 848c35d..443fbd9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include diff --git a/net/core/Makefile b/net/core/Makefile index 630da0f..3393a46 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o host.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o diff --git a/net/core/dst.c b/net/core/dst.c index 470c05b..5a8e753 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -241,13 +242,13 @@ static inline void dst_ifdown(struct dst dst->input = dst_discard_in; dst->output = dst_discard_out; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = &init_host.loopback_dev; + dev_hold(&init_host.loopback_dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = &init_host.loopback_dev; dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(&init_host.loopback_dev); } } } diff --git a/net/core/host.c b/net/core/host.c new file mode 100644 index 0000000..3b8bc93 --- /dev/null +++ b/net/core/host.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +void __put_host(struct nethost *host) +{ + loopback_free(&host->loopback_dev); + kfree(host); +} + +int __copy_host(int flags, struct task_struct *p) +{ + struct nethost *host; + int result; + host = kzalloc(sizeof(*host), GFP_KERNEL); + if (!host) + return -ENOMEM; + atomic_set(&host->count, 1); + down_read(&uts_sem); + memcpy(&host->utsname, &p->host->utsname, sizeof(host->utsname)); + up_read(&uts_sem); + result = loopback_init(&host->loopback_dev); + if (result != 0) { + kfree(host); + } else { + struct net_device *dev = &host->loopback_dev; + rtnl_shlock(); + if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) + printk(KERN_WARNING "clone: Failed to bring up %s\n", dev->name); + rtnl_shunlock(); + p->host = host; + } + return result; +} + +__init int nethost_init(void) +{ + return loopback_init(&init_host.loopback_dev); +} diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6c90700..0e2f0c5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -713,6 +713,7 @@ void __init rtnetlink_init(void) if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); + netlink_set_multihost(NETLINK_ROUTE); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table; rtnetlink_links[PF_PACKET] = link_rtnetlink_table; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a9d84f9..44273ee 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include @@ -278,6 +279,9 @@ lookup_protocol: goto out_rcu_unlock; } + if ((current->host != &init_host) && + !(answer->flags & INET_PROTOSW_MULTIHOST)) + goto out_rcu_unlock; err = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; @@ -299,6 +303,7 @@ lookup_protocol: goto out; err = 0; + sk->sk_host = current->host; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = 1; @@ -428,6 +433,13 @@ int inet_bind(struct socket *sock, struc chk_addr_ret != RTN_BROADCAST) goto out; + /* Verify local addresses are for the current host */ + /* FIXME do I need to handle inet->freebind and sysctl_ip_nonlocal_bind here? */ + if ((chk_addr_ret == RTN_LOCAL) && + !inet_confirm_addr(sk->sk_host, NULL, 0, addr->sin_addr.s_addr, + RT_SCOPE_UNIVERSE)) + goto out; + snum = ntohs(addr->sin_port); err = -EACCES; if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) @@ -856,6 +868,7 @@ static struct proto_ops inet_sockraw_ops static struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, + .multi_host = 1, .owner = THIS_MODULE, }; @@ -871,7 +884,7 @@ static struct inet_protosw inetsw_array[ .ops = &inet_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_MULTIHOST, }, { @@ -881,7 +894,7 @@ static struct inet_protosw inetsw_array[ .ops = &inet_dgram_ops, .capability = -1, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_MULTIHOST, }, @@ -892,7 +905,7 @@ static struct inet_protosw inetsw_array[ .ops = &inet_sockraw_ops, .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_REUSE, + .flags = INET_PROTOSW_REUSE | INET_PROTOSW_MULTIHOST, } }; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b425748..9586d33 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -363,7 +363,7 @@ static void arp_solicit(struct neighbour if (in_dev) in_dev_put(in_dev); if (!saddr) - saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); + saddr = inet_select_addr(NULL, dev, target, RT_SCOPE_LINK); if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state&NUD_VALID)) @@ -416,7 +416,7 @@ static int arp_ignore(struct in_device * default: return 0; } - return !inet_confirm_addr(dev, sip, tip, scope); + return !inet_confirm_addr(NULL, dev, sip, tip, scope); } static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e6949fa..bebe427 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -57,6 +57,7 @@ #include #endif #include +#include #include #include @@ -180,6 +181,34 @@ static void in_dev_rcu_put(struct rcu_he in_dev_put(idev); } +static void inethost_destroy(struct nethost *host) +{ + /* FIXME is the lock correct in this function? */ + struct net_device *dev; + read_lock(&dev_base_lock); + rcu_read_lock(); + for (dev = dev_base; dev; dev = dev->next) { + struct in_device *in_dev; + struct in_ifaddr *ifa, **ifap; + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) + continue; + + ifap = &in_dev->ifa_list; + while ((ifa = *ifap) != NULL) { + + if (ifa->ifa_host != host) { + ifap = &ifa->ifa_next; + continue; + } + + inet_del_ifa(in_dev, ifap, 0); + inet_free_ifa(ifa); + } + } + rcu_read_unlock(); + read_unlock(&dev_base_lock); +} + static void inetdev_destroy(struct in_device *in_dev) { struct in_ifaddr *ifa; @@ -188,7 +217,7 @@ static void inetdev_destroy(struct in_de ASSERT_RTNL(); dev = in_dev->dev; - if (dev == &loopback_dev) + if (dev == &init_host.loopback_dev) return; in_dev->dead = 1; @@ -413,7 +442,9 @@ static int inet_rtm_deladdr(struct sk_bu for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if ((rta[IFA_LOCAL - 1] && + if ((ifa->ifa_host && + skb->sk->sk_host != ifa->ifa_host) || + (rta[IFA_LOCAL - 1] && memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), &ifa->ifa_local, 4)) || (rta[IFA_LABEL - 1] && @@ -458,6 +489,7 @@ static int inet_rtm_newaddr(struct sk_bu if ((ifa = inet_alloc_ifa()) == NULL) goto out; + ifa->ifa_host = skb->sk->sk_host; if (!rta[IFA_ADDRESS - 1]) rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); @@ -594,6 +626,8 @@ int devinet_ioctl(unsigned int cmd, void for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { if (!strcmp(ifr.ifr_name, ifa->ifa_label) && + (!ifa->ifa_host || + ifa->ifa_host == current->host) && sin_orig.sin_addr.s_addr == ifa->ifa_address) { break; /* found */ @@ -606,7 +640,9 @@ int devinet_ioctl(unsigned int cmd, void if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) - if (!strcmp(ifr.ifr_name, ifa->ifa_label)) + if (!strcmp(ifr.ifr_name, ifa->ifa_label) && + (!ifa->ifa_host || + ifa->ifa_host == current->host)) break; } } @@ -654,6 +690,7 @@ int devinet_ioctl(unsigned int cmd, void ret = -ENOBUFS; if ((ifa = inet_alloc_ifa()) == NULL) break; + ifa->ifa_host = current->host; if (colon) memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); else @@ -785,7 +822,7 @@ out: return done; } -u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) +u32 inet_select_addr(struct nethost *host, const struct net_device *dev, u32 dst, int scope) { u32 addr = 0; struct in_device *in_dev; @@ -796,6 +833,8 @@ u32 inet_select_addr(const struct net_de goto no_in_dev; for_primary_ifa(in_dev) { + if (ifa->ifa_host && host && ifa->ifa_host != host) + continue; if (ifa->ifa_scope > scope) continue; if (!dst || inet_ifa_match(dst, ifa)) { @@ -822,6 +861,8 @@ no_in_dev: continue; for_primary_ifa(in_dev) { + if (ifa->ifa_host && host && ifa->ifa_host != host) + continue; if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) { addr = ifa->ifa_local; @@ -836,13 +877,15 @@ out: return addr; } -static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, - u32 local, int scope) +static u32 confirm_addr_indev(struct nethost *host, struct in_device *in_dev, + u32 dst, u32 local, int scope) { int same = 0; u32 addr = 0; for_ifa(in_dev) { + if (ifa->ifa_host && host && ifa->ifa_host != host) + continue; if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) { @@ -875,12 +918,15 @@ static u32 confirm_addr_indev(struct in_ /* * Confirm that local IP address exists using wildcards: - * - dev: only on this interface, 0=any interface + * - host: logical host, NULL for any host + * - dev: only on this interface, NULL=any interface * - dst: only in the same subnet as dst, 0=any dst * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope) +u32 inet_confirm_addr(struct nethost *host, const struct net_device *dev, + u32 dst, u32 local, int scope) + { u32 addr = 0; struct in_device *in_dev; @@ -888,7 +934,7 @@ u32 inet_confirm_addr(const struct net_d if (dev) { rcu_read_lock(); if ((in_dev = __in_dev_get_rcu(dev))) - addr = confirm_addr_indev(in_dev, dst, local, scope); + addr = confirm_addr_indev(host, in_dev, dst, local, scope); rcu_read_unlock(); return addr; @@ -898,7 +944,7 @@ u32 inet_confirm_addr(const struct net_d rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { if ((in_dev = __in_dev_get_rcu(dev))) { - addr = confirm_addr_indev(in_dev, dst, local, scope); + addr = confirm_addr_indev(host, in_dev, dst, local, scope); if (addr) break; } @@ -909,6 +955,37 @@ u32 inet_confirm_addr(const struct net_d return addr; } +/** + * inet_dev_host - Find the host for a device, interface pair + * @ifindex: index of device + * @addr: ip address on interface for host + */ +struct nethost *inet_dev_host(int ifindex, u32 addr) +{ + struct net_device *dev; + struct in_device *in_dev; + struct nethost *host = NULL; /* ERR_PTR(-EADDRNOTAVAIL); */ + dev = dev_get_by_index(ifindex); + if (!dev) + goto out; + rcu_read_lock(); + if ((in_dev = __in_dev_get_rcu(dev))) { + for_ifa(in_dev) { + if (ifa->ifa_address == addr) { + host = ifa->ifa_host; + break; + } + } endfor_ifa(in_dev); + } + rcu_read_unlock(); + if (!host && dev->type == ARPHRD_LOOPBACK) { + host = container_of(dev, struct nethost, loopback_dev); + } + dev_put(dev); +out: + return host; +} + /* * Device notifier */ @@ -962,7 +1039,7 @@ static int inetdev_event(struct notifier ASSERT_RTNL(); if (!in_dev) { - if (event == NETDEV_REGISTER && dev == &loopback_dev) { + if (event == NETDEV_REGISTER && dev->type == ARPHRD_LOOPBACK) { in_dev = inetdev_init(dev); if (!in_dev) panic("devinet: Failed to create loopback\n"); @@ -980,9 +1057,10 @@ static int inetdev_event(struct notifier case NETDEV_UP: if (dev->mtu < 68) break; - if (dev == &loopback_dev) { + if (dev->type == ARPHRD_LOOPBACK) { struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { + ifa->ifa_host = container_of(dev, struct nethost, loopback_dev); ifa->ifa_local = ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; @@ -1004,6 +1082,9 @@ static int inetdev_event(struct notifier break; /* MTU falled under 68, disable IP */ case NETDEV_UNREGISTER: + if (dev->type == ARPHRD_LOOPBACK) { + inethost_destroy(container_of(dev, struct nethost, loopback_dev)); + } inetdev_destroy(in_dev); break; case NETDEV_CHANGENAME: @@ -1084,8 +1165,10 @@ static int inet_dump_ifaddr(struct sk_bu } for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; - ifa = ifa->ifa_next, ip_idx++) { - if (ip_idx < s_ip_idx) + ifa = ifa->ifa_next) { + if (ifa->ifa_host && ifa->ifa_host != skb->sk->sk_host) + continue; + if (ip_idx++ < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e61bc71..20a11c9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -158,7 +158,7 @@ unsigned inet_addr_type(u32 addr) - check, that packet arrived from expected physical interface. */ -int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, +int fib_validate_source(struct nethost *host, u32 src, u32 dst, u8 tos, int oif, struct net_device *dev, u32 *spec_dst, u32 *itag) { struct in_device *in_dev; @@ -187,7 +187,7 @@ int fib_validate_source(u32 src, u32 dst goto last_resort; if (res.type != RTN_UNICAST) goto e_inval_res; - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(host, res); fib_combine_itag(itag, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) @@ -209,7 +209,7 @@ int fib_validate_source(u32 src, u32 dst ret = 0; if (fib_lookup(&fl, &res) == 0) { if (res.type == RTN_UNICAST) { - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(host, res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } fib_res_put(&res); @@ -219,7 +219,7 @@ int fib_validate_source(u32 src, u32 dst last_resort: if (rpf) goto e_inval; - *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + *spec_dst = inet_select_addr(host, dev, 0, RT_SCOPE_UNIVERSE); *itag = 0; return 0; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1107961..15a3972 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -932,9 +932,17 @@ out_fill_res: /* Find appropriate source address to this destination */ -u32 __fib_res_prefsrc(struct fib_result *res) +u32 fib_res_prefsrc(struct nethost *host, struct fib_result *res) { - return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); + u32 addr = 0; + if (res->fi->fib_prefsrc) { + addr = res->fi->fib_prefsrc; + if (host && host != inet_dev_host(FIB_RES_DEV(*res)->ifindex, addr)) + addr = 0; + } + if (!addr) + addr = inet_select_addr(host, FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); + return addr; } int diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 90dca71..cbbd593 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -513,8 +513,11 @@ void icmp_send(struct sk_buff *skb_in, i saddr = iph->daddr; if (!(rt->rt_flags & RTCF_LOCAL)) { - if (sysctl_icmp_errors_use_inbound_ifaddr) - saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK); + if (sysctl_icmp_errors_use_inbound_ifaddr) { + struct nethost *host; + host = inet_dev_host(skb_in->dev->ifindex, iph->saddr); + saddr = inet_select_addr(host, skb_in->dev, 0, RT_SCOPE_LINK); + } else saddr = 0; } @@ -601,6 +604,7 @@ static void icmp_unreach(struct sk_buff int hash, protocol; struct net_protocol *ipprot; struct sock *raw_sk; + struct nethost *host; u32 info = 0; /* @@ -690,6 +694,7 @@ static void icmp_unreach(struct sk_buff iph = (struct iphdr *)skb->data; protocol = iph->protocol; + host = inet_dev_host(skb->dev->ifindex, iph->saddr); /* * Deliver ICMP message to raw sockets. Pretty useless feature? @@ -701,7 +706,7 @@ static void icmp_unreach(struct sk_buff if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, iph->saddr, - skb->dev->ifindex)) != NULL) { + skb->dev->ifindex, host)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = sk_next(raw_sk); iph = (struct iphdr *)skb->data; @@ -712,7 +717,7 @@ static void icmp_unreach(struct sk_buff rcu_read_lock(); ipprot = rcu_dereference(inet_protos[hash]); if (ipprot && ipprot->err_handler) - ipprot->err_handler(skb, info); + ipprot->err_handler(skb, host, info); rcu_read_unlock(); out: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 94468a7..575f8c8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -46,6 +46,7 @@ static inline int inet_csk_bind_conflict sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && + sk->sk_host == sk2->sk_host && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e8d29fe..ce97399 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,8 @@ EXPORT_SYMBOL(inet_listen_wlock); * wildcarded during the search since they can never be otherwise. */ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, + const struct nethost *host) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -151,6 +153,8 @@ struct sock *__inet_lookup_listener(cons continue; score += 2; } + if (host && host != sk->sk_host) + continue; if (score == 5) return sk; if (score > hiscore) { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 473d0f2..4d4dcee 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -265,6 +265,9 @@ static inline int ip_local_deliver_finis */ int ip_local_deliver(struct sk_buff *skb) { + WARN_ON(!MULTICAST(skb->nh.iph->daddr) && + (skb->nh.iph->daddr != 0xffffffff) && + !(((struct rtable *)skb->dst)->rt_host)); /* * Reassemble IP fragments. */ diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index bce4e87..11a03c9 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -38,6 +38,7 @@ void ip_options_build(struct sk_buff * s u32 daddr, struct rtable *rt, int is_frag) { unsigned char * iph = skb->nh.raw; + struct sock *sk = skb->sk; memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); @@ -49,9 +50,9 @@ void ip_options_build(struct sk_buff * s if (!is_frag) { if (opt->rr_needaddr) - ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); + ip_rt_get_source(sk->sk_host, iph+opt->rr+iph[opt->rr+2]-5, rt); if (opt->ts_needaddr) - ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); + ip_rt_get_source(sk->sk_host, iph+opt->ts+iph[opt->ts+2]-9, rt); if (opt->ts_needtime) { struct timeval tv; __u32 midtime; @@ -549,7 +550,7 @@ void ip_forward_options(struct sk_buff * if (opt->rr_needaddr) { optptr = (unsigned char *)raw + opt->rr; - ip_rt_get_source(&optptr[optptr[2]-5], rt); + ip_rt_get_source(NULL, &optptr[optptr[2]-5], rt); opt->is_changed = 1; } if (opt->srr_is_hit) { @@ -568,14 +569,14 @@ void ip_forward_options(struct sk_buff * } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; - ip_rt_get_source(&optptr[srrptr-1], rt); + ip_rt_get_source(NULL, &optptr[srrptr-1], rt); skb->nh.iph->daddr = rt->rt_dst; optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); if (opt->ts_needaddr) { optptr = raw + opt->ts; - ip_rt_get_source(&optptr[optptr[2]-9], rt); + ip_rt_get_source(NULL, &optptr[optptr[2]-9], rt); opt->is_changed = 1; } } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ce08074..5c30abf 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -180,18 +181,19 @@ static struct net_device *ic_dev __initd static int __init ic_open_devs(void) { struct ic_device *d, **last; - struct net_device *dev; + struct net_device *dev, *loopback_dev; unsigned short oflags; last = &ic_first_dev; rtnl_shlock(); /* bring loopback device up first */ - if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); + loopback_dev = ¤t->host->loopback_dev; + if (dev_change_flags(loopback_dev, loopback_dev->flags | IFF_UP) < 0) + printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev->name); for (dev = dev_base; dev; dev = dev->next) { - if (dev == &loopback_dev) + if (dev == loopback_dev) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : (!(dev->flags & IFF_LOOPBACK) && diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b0d7e4..f25f0d6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -104,7 +104,7 @@ static void raw_v4_unhash(struct sock *s struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, - int dif) + int dif, const struct nethost *host) { struct hlist_node *node; @@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && !(inet->rcv_saddr && inet->rcv_saddr != laddr) && - !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) && + !(host && host != sk->sk_host)) goto found; /* gotcha */ } sk = NULL; @@ -154,15 +155,17 @@ int raw_v4_input(struct sk_buff *skb, st { struct sock *sk; struct hlist_head *head; + const struct nethost *host; int delivered = 0; read_lock(&raw_v4_lock); head = &raw_v4_htable[hash]; if (hlist_empty(head)) goto out; + host = inet_dev_host(skb->dev->ifindex, iph->daddr); sk = __raw_v4_lookup(__sk_head(head), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, host); while (sk) { delivered = 1; @@ -175,7 +178,7 @@ int raw_v4_input(struct sk_buff *skb, st } sk = __raw_v4_lookup(sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, host); } out: read_unlock(&raw_v4_lock); @@ -555,6 +558,13 @@ static int raw_bind(struct sock *sk, str if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; + + /* Verify local addresses are for the current host */ + if ((chk_addr_ret == RTN_LOCAL) && + !inet_confirm_addr(sk->sk_host, NULL, 0, addr->sin_addr.s_addr, + RT_SCOPE_UNIVERSE)) + goto out; + inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 381dd6a..22076b4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -1483,8 +1484,8 @@ static void ipv4_dst_ifdown(struct dst_e { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != &loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + if (dev != &init_host.loopback_dev && idev && idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(&init_host.loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1521,7 +1522,7 @@ static int ip_rt_bug(struct sk_buff *skb in IP options! */ -void ip_rt_get_source(u8 *addr, struct rtable *rt) +void ip_rt_get_source(struct nethost *host, u8 *addr, struct rtable *rt) { u32 src; struct fib_result res; @@ -1529,10 +1530,10 @@ void ip_rt_get_source(u8 *addr, struct r if (rt->fl.iif == 0) src = rt->rt_src; else if (fib_lookup(&rt->fl, &res) == 0) { - src = FIB_RES_PREFSRC(res); + src = FIB_RES_PREFSRC(host, res); fib_res_put(&res); } else - src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, + src = inet_select_addr(host, rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); memcpy(addr, &src, 4); } @@ -1610,8 +1611,8 @@ static int ip_route_input_mc(struct sk_b if (ZERONET(saddr)) { if (!LOCAL_MCAST(daddr)) goto e_inval; - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); - } else if (fib_validate_source(saddr, 0, tos, 0, + spec_dst = inet_select_addr(NULL, dev, 0, RT_SCOPE_LINK); + } else if (fib_validate_source(NULL, saddr, 0, tos, 0, dev, &spec_dst, &itag) < 0) goto e_inval; @@ -1638,13 +1639,14 @@ static int ip_route_input_mc(struct sk_b #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &init_host.loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_type = RTN_MULTICAST; + rth->rt_host = NULL; rth->rt_flags = RTCF_MULTICAST; if (our) { rth->u.dst.input= ip_local_deliver; @@ -1725,7 +1727,7 @@ static inline int __mkroute_input(struct } - err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), + err = fib_validate_source(NULL, saddr, daddr, tos, FIB_RES_OIF(*res), in_dev->dev, &spec_dst, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, @@ -1916,6 +1918,7 @@ static int ip_route_input_slow(struct sk u32 spec_dst; int err = -EINVAL; int free_res = 0; + struct nethost *host = NULL; /* IP on this device is disabled. */ @@ -1958,8 +1961,11 @@ static int ip_route_input_slow(struct sk if (res.type == RTN_LOCAL) { int result; - result = fib_validate_source(saddr, daddr, tos, - loopback_dev.ifindex, + host = inet_dev_host(dev->ifindex, daddr); + if (!host) + goto martian_destination; + result = fib_validate_source(host, saddr, daddr, tos, + host->loopback_dev.ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; @@ -1990,11 +1996,12 @@ brd_input: if (skb->protocol != htons(ETH_P_IP)) goto e_inval; + host = inet_dev_host(dev->ifindex, daddr); if (ZERONET(saddr)) - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); + spec_dst = inet_select_addr(host, dev, 0, RT_SCOPE_LINK); else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag); + err = fib_validate_source(host, saddr, 0, tos, 0, dev, + &spec_dst, &itag); if (err < 0) goto martian_source; if (err) @@ -2004,6 +2011,10 @@ brd_input: res.type = RTN_BROADCAST; RT_CACHE_STAT_INC(in_brd); + /* FIXME how do I handle broadcast traffic through loopback interfaces? */ + if (!host) + host = &init_host; + local_input: rth = dst_alloc(&ipv4_dst_ops); if (!rth) @@ -2028,7 +2039,7 @@ local_input: #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &host->loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->rt_gateway = daddr; @@ -2041,13 +2052,14 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; + rth->rt_host = host; /* FIXME doesn't this need to be null for broadcast/multicast */ hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); goto done; no_route: RT_CACHE_STAT_INC(in_no_route); - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + spec_dst = inet_select_addr(NULL, dev, 0, RT_SCOPE_UNIVERSE); res.type = RTN_UNREACHABLE; goto local_input; @@ -2241,11 +2253,15 @@ static inline int __mkroute_output(struc RT_CACHE_STAT_INC(out_slow_tot); + if (dev_out->type == ARPHRD_LOOPBACK) { + rth->rt_host = container_of(dev_out, struct nethost, loopback_dev); + } if (flags & RTCF_LOCAL) { rth->u.dst.input = ip_local_deliver; rth->rt_spec_dst = fl->fl4_dst; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { + rth->rt_host = NULL; rth->rt_spec_dst = fl->fl4_src; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { @@ -2364,9 +2380,10 @@ static inline int ip_mkroute_output(stru * Major route resolver routine. */ -static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) +static int ip_route_output_slow(struct nethost *host, struct rtable **rp, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); + struct net_device *loopback_dev = host? &host->loopback_dev : &init_host.loopback_dev; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = oldflp->fl4_dst, .saddr = oldflp->fl4_src, @@ -2378,7 +2395,7 @@ static int ip_route_output_slow(struct r .fwmark = oldflp->fl4_fwmark #endif } }, - .iif = loopback_dev.ifindex, + .iif = loopback_dev->ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; @@ -2452,16 +2469,16 @@ static int ip_route_output_slow(struct r if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) { if (!fl.fl4_src) - fl.fl4_src = inet_select_addr(dev_out, 0, + fl.fl4_src = inet_select_addr(host, dev_out, 0, RT_SCOPE_LINK); goto make_route; } if (!fl.fl4_src) { if (MULTICAST(oldflp->fl4_dst)) - fl.fl4_src = inet_select_addr(dev_out, 0, + fl.fl4_src = inet_select_addr(host, dev_out, 0, fl.fl4_scope); else if (!oldflp->fl4_dst) - fl.fl4_src = inet_select_addr(dev_out, 0, + fl.fl4_src = inet_select_addr(host, dev_out, 0, RT_SCOPE_HOST); } } @@ -2472,9 +2489,9 @@ static int ip_route_output_slow(struct r fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = loopback_dev; dev_hold(dev_out); - fl.oif = loopback_dev.ifindex; + fl.oif = loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2502,7 +2519,7 @@ static int ip_route_output_slow(struct r */ if (fl.fl4_src == 0) - fl.fl4_src = inet_select_addr(dev_out, 0, + fl.fl4_src = inet_select_addr(host, dev_out, 0, RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; @@ -2515,11 +2532,15 @@ static int ip_route_output_slow(struct r free_res = 1; if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) - fl.fl4_src = fl.fl4_dst; + if (!fl.fl4_src) { + if (!host || (inet_dev_host(FIB_RES_DEV(res)->ifindex, fl.fl4_dst) == host)) + fl.fl4_src = fl.fl4_dst; + else + fl.fl4_src = FIB_RES_PREFSRC(host, res); + } if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -2538,7 +2559,7 @@ static int ip_route_output_slow(struct r fib_select_default(&fl, &res); if (!fl.fl4_src) - fl.fl4_src = FIB_RES_PREFSRC(res); + fl.fl4_src = FIB_RES_PREFSRC(host, res); if (dev_out) dev_put(dev_out); @@ -2558,7 +2579,7 @@ make_route: out: return err; } -int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) +int __ip_route_output_key(struct nethost *host, struct rtable **rp, const struct flowi *flp) { unsigned hash; struct rtable *rth; @@ -2600,7 +2621,7 @@ int __ip_route_output_key(struct rtable } rcu_read_unlock_bh(); - return ip_route_output_slow(rp, flp); + return ip_route_output_slow(host, rp, flp); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2608,8 +2629,12 @@ EXPORT_SYMBOL_GPL(__ip_route_output_key) int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) { int err; + struct nethost *host = NULL; + + if (sk) + host = sk->sk_host; - if ((err = __ip_route_output_key(rp, flp)) != 0) + if ((err = __ip_route_output_key(host, rp, flp)) != 0) return err; if (flp->proto) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c85819d..a66d58a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -180,7 +180,7 @@ static int __tcp_v4_check_established(st /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, sk->sk_host)) goto not_unique; } @@ -497,7 +497,7 @@ static inline void do_pmtu_discovery(str * */ -void tcp_v4_err(struct sk_buff *skb, u32 info) +void tcp_v4_err(struct sk_buff *skb, struct nethost *host, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); @@ -515,7 +515,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 } sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, inet_iif(skb)); + th->source, inet_iif(skb), host); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1092,7 +1092,8 @@ static struct sock *tcp_v4_hnd_req(struc nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, - ntohs(th->dest), inet_iif(skb)); + ntohs(th->dest), inet_iif(skb), + sk->sk_host); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1200,6 +1201,7 @@ int tcp_v4_rcv(struct sk_buff *skb) struct tcphdr *th; struct sock *sk; int ret; + struct rtable *rt; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1234,9 +1236,10 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; TCP_SKB_CB(skb)->sacked = 0; + rt = (struct rtable*)skb->dst; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, ntohs(th->dest), - inet_iif(skb)); + inet_iif(skb), rt->rt_host); if (!sk) goto no_tcp_socket; @@ -1303,7 +1306,8 @@ do_time_wait: struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), - inet_iif(skb)); + inet_iif(skb), + sk->sk_host); if (sk2) { inet_twsk_deschedule((struct inet_timewait_sock *)sk, &tcp_death_row); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e0bd101..caace8a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -176,6 +176,7 @@ gotit: if (inet2->num == snum && sk2 != sk && + sk2->sk_host == sk->sk_host && !ipv6_only_sock(sk2) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || @@ -221,7 +222,8 @@ static void udp_v4_unhash(struct sock *s * harder than this. -DaveM */ static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) + u32 daddr, u16 dport, int dif, + struct nethost *host) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -253,6 +255,8 @@ static struct sock *udp_v4_lookup_longwa continue; score+=2; } + if (host && host != sk->sk_host) + continue; if(score == 9) { result = sk; break; @@ -266,12 +270,13 @@ static struct sock *udp_v4_lookup_longwa } static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) + u32 daddr, u16 dport, int dif, + struct nethost *host) { struct sock *sk; read_lock(&udp_hash_lock); - sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif); + sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif, host); if (sk) sock_hold(sk); read_unlock(&udp_hash_lock); @@ -317,7 +322,7 @@ found: * to find the appropriate port. */ -void udp_err(struct sk_buff *skb, u32 info) +void udp_err(struct sk_buff *skb, struct nethost *host, u32 info) { struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; @@ -328,7 +333,8 @@ void udp_err(struct sk_buff *skb, u32 in int harderr; int err; - sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); + sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, + skb->dev->ifindex, host); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -1150,7 +1156,8 @@ int udp_rcv(struct sk_buff *skb) if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return udp_v4_mcast_deliver(skb, uh, saddr, daddr); - sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); + sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, + skb->dev->ifindex, rt->rt_host); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b2b60f3..bc54ffe 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -22,7 +23,15 @@ static struct xfrm_type_map xfrm4_type_m static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) { - return __ip_route_output_key((struct rtable**)dst, fl); + /* This function is simply to support dead code remove it */ + BUG(); + /* I can't properly convert this because I can't tell if the caller + * should be able to pass me a host or not. My gut feel is that + * the caller should be able to pass me a host. + * Interestingly enough this feels like a way to export a GPL + * symbol non-gpl as well. + */ + return __ip_route_output_key(NULL, (struct rtable**)dst, fl); } static struct dst_entry * @@ -267,7 +276,7 @@ static void xfrm4_dst_ifdown(struct dst_ xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct in_device *loopback_idev = in_dev_get(&init_host.loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a970b47..4337fb3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1000,6 +1001,36 @@ int ipv6_chk_addr(struct in6_addr *addr, return ifp != NULL; } +int ipv6_host_match(const struct sock *sk, const int dif, const struct in6_addr *addr) +{ + struct net_device *dev; + struct inet6_dev *idev; + struct inet6_ifaddr *ifp; + int match; + match = 0; + dev = dev_get_by_index(dif); + if (!dev) + goto out; + + read_lock(&addrconf_lock); + if ((idev = __in6_dev_get(dev)) != NULL) { + read_lock_bh(&idev->lock); + for(ifp = idev->addr_list; ifp; ifp=ifp->if_next) { + if (ipv6_addr_equal(&ifp->addr, addr) && + (ifp->host == sk->sk_host)) + { + match = 1; + break; + } + } + read_unlock_bh(&idev->lock); + } + read_unlock(&addrconf_lock); + dev_put(dev); +out: + return match; +} + static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) { @@ -2074,7 +2105,7 @@ static int addrconf_ifdown(struct net_de ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == &init_host.loopback_dev && how == 1) how = 0; rt6_ifdown(dev); @@ -2860,7 +2891,7 @@ static void inet6_ifa_notify(int event, netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); return; } - if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { + if (inet6_fill_ifaddr(skb, ifa, current->tid, 0, event, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); return; @@ -2996,7 +3027,7 @@ void inet6_ifinfo_notify(int event, stru netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); return; } - if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { + if (inet6_fill_ifinfo(skb, idev, current->tid, 0, event, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); return; @@ -3056,7 +3087,7 @@ static void inet6_prefix_notify(int even netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); return; } - if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { + if (inet6_fill_prefix(skb, idev, pinfo, current->tid, 0, event, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); return; @@ -3514,13 +3545,13 @@ int __init addrconf_init(void) * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) + if (!ipv6_add_dev(&init_host.loopback_dev)) err = -ENOMEM; rtnl_unlock(); if (err) return err; - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_null_entry.rt6i_idev = in6_dev_get(&init_host.loopback_dev); register_netdevice_notifier(&ipv6_dev_notf); @@ -3568,7 +3599,7 @@ void __exit addrconf_cleanup(void) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(&loopback_dev, 2); + addrconf_ifdown(&init_host.loopback_dev, 2); /* * Check hash table. diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4f8795a..c565cd6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,9 @@ static int inet6_create(struct socket *s rc = -ESOCKTNOSUPPORT; if (!answer) goto out_rcu_unlock; + if ((current->host != &init_host) && + !(answer->flags & INET_PROTOSW_MULTIHOST)) + goto out_rcu_unlock; rc = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; @@ -143,6 +147,7 @@ static int inet6_create(struct socket *s sock_init_data(sock, sk); rc = 0; + sk->sk_host = current->host; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = 1; @@ -485,6 +490,7 @@ struct proto_ops inet6_dgram_ops = { static struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, + .multi_host = 1, .owner = THIS_MODULE, }; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 01d5f46..4c1d26a 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -46,6 +46,10 @@ struct sock *inet6_lookup_listener(struc continue; score++; } +#if 0 + if (!ipv6_host_match(sk, dif, daddr)) + continue; +#endif if (score == 3) { result = sk; break; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f841bde..6376153 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -342,7 +342,7 @@ fl_create(struct in6_flowlabel_req *freq case IPV6_FL_S_ANY: break; case IPV6_FL_S_PROCESS: - fl->owner = current->pid; + fl->owner = current->tid; break; case IPV6_FL_S_USER: fl->owner = current->euid; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a1265a3..c9fb268 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,6 +107,8 @@ struct sock *__raw_v6_lookup(struct sock goto found; continue; } + if (!ipv6_host_match(sk, dif, loc_addr)) + continue; goto found; } sk = NULL; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5d5bbb4..64cd31a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_PROC_FS #include @@ -112,7 +113,7 @@ struct rt6_info ip6_null_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = &init_host.loopback_dev, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -160,8 +161,8 @@ static void ip6_dst_ifdown(struct dst_en struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (dev != &init_host.loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&init_host.loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -865,12 +866,12 @@ int ip6_route_add(struct in6_rtmsg *rtms if ((rtmsg->rtmsg_flags&RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != &loopback_dev) { + if (dev != &init_host.loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = &loopback_dev; + dev = &init_host.loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1399,13 +1400,13 @@ struct rt6_info *addrconf_dst_alloc(stru if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&loopback_dev); + dev_hold(&init_host.loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &loopback_dev; + rt->rt6i_dev = &init_host.loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -1840,7 +1841,7 @@ void inet6_rt_notify(int event, struct r { struct sk_buff *skb; int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); - u32 pid = current->pid; + u32 pid = current->tid; u32 seq = 0; if (req) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bf95193..2053c1d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -188,6 +188,8 @@ static struct sock *udp_v6_lookup(struct continue; score++; } + if (!ipv6_host_match(sk, dif, daddr)) + continue; if(score == 4) { result = sk; break; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index cf1d91e..f214ccc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -280,7 +281,7 @@ static void xfrm6_dst_ifdown(struct dst_ xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + struct inet6_dev *loopback_idev = in6_dev_get(&init_host.loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8e282c2..fb5a223 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + unsigned int nl_multihost; unsigned int groups; struct module *module; int registered; @@ -197,17 +199,27 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(struct nethost *host, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; + if (pid == 0) + host = NULL; + /* FIXME how do I modify netlink so when I transmit + * to userspace I always have enough information to + * unique identify the process. + * Do I perhaps need to open new kernel sockets when + * I get a new network namespace? + */ + WARN_ON(pid && !host); read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if ((!host || (sk->sk_host == host)) && + (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -307,7 +319,7 @@ static int netlink_insert(struct sock *s head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if ((nlk_sk(osk)->pid == pid) && (osk->sk_host == sk->sk_host)) break; len++; } @@ -401,9 +413,14 @@ static int netlink_create(struct socket groups = nl_table[protocol].groups; netlink_unlock_table(); + err = -EPROTONOSUPPORT; + if (!nl_table[protocol].nl_multihost && (current->host != &init_host)) + goto out_module; + if ((err = __netlink_create(sock, protocol) < 0)) goto out_module; + sock->sk->sk_host = current->host; nlk = nlk_sk(sock->sk); nlk->module = module; out: @@ -483,7 +500,7 @@ retry: netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) { + if ((osk->sk_host == sk->sk_host) && (nlk_sk(osk)->pid == pid)) { /* Bind collision, search negative pid values. */ pid = rover--; if (rover > -4097) @@ -655,10 +672,11 @@ static void netlink_overrun(struct sock static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; + struct nethost *host = ssk->sk_host; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + sock = netlink_lookup(host, protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); @@ -822,6 +840,7 @@ static __inline__ int netlink_broadcast_ struct netlink_broadcast_data { struct sock *exclude_sk; + struct nethost *host; u32 pid; u32 group; int failure; @@ -839,6 +858,9 @@ static inline int do_one_broadcast(struc if (p->exclude_sk == sk) goto out; + + if (p->host && sk->sk_host && p->host != sk->sk_host) + goto out; if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) @@ -889,6 +911,7 @@ int netlink_broadcast(struct sock *ssk, skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.host = ssk->sk_host; info.pid = pid; info.group = group; info.failure = 0; @@ -1249,6 +1272,7 @@ netlink_kernel_create(int unit, unsigned goto out_sock_release; sk = sock->sk; + sk->sk_host = NULL; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; @@ -1278,6 +1302,12 @@ void netlink_set_nonroot(int protocol, u nl_table[protocol].nl_nonroot = flags; } +void netlink_set_multihost(int protocol) +{ + if ((unsigned int)protocol < MAX_LINKS) + nl_table[protocol].nl_multihost = 1; +} + static void netlink_destroy_callback(struct netlink_callback *cb) { if (cb->skb) @@ -1356,7 +1386,7 @@ int netlink_dump_start(struct sock *ssk, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(skb->sk->sk_host, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1394,7 +1424,7 @@ void netlink_ack(struct sk_buff *in_skb, if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_host, in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1592,6 +1622,7 @@ static struct proto_ops netlink_ops = { static struct net_proto_family netlink_family_ops = { .family = PF_NETLINK, .create = netlink_create, + .multi_host = 1, /* FIXME this is currently A lie... */ .owner = THIS_MODULE, /* for consistency 8) */ }; @@ -1666,6 +1697,7 @@ EXPORT_SYMBOL(netlink_kernel_create); EXPORT_SYMBOL(netlink_register_notifier); EXPORT_SYMBOL(netlink_set_err); EXPORT_SYMBOL(netlink_set_nonroot); +EXPORT_SYMBOL(netlink_set_multihost); EXPORT_SYMBOL(netlink_unicast); EXPORT_SYMBOL(netlink_unregister_notifier); diff --git a/net/socket.c b/net/socket.c index 3145103..d0a9015 100644 --- a/net/socket.c +++ b/net/socket.c @@ -84,6 +84,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NET_RADIO #include /* Note : will define WIRELESS_EXT */ @@ -1145,6 +1147,12 @@ static int __sock_create(int family, int if (!try_module_get(net_families[family]->owner)) goto out_release; + /* Unless the address family supports multiple hosts on the same + * machine. Fail if we are not the primary host. + */ + if ((current->host != &init_host) && !net_families[family]->multi_host) + goto out_module_put; + if ((err = net_families[family]->create(sock, protocol)) < 0) { sock->ops = NULL; goto out_module_put; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9b5c800..b8f67dd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 41feca3..3ccf9e0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -554,6 +554,7 @@ static struct sock * unix_create1(struct if (!sk) goto out; + sk->sk_host = NULL; atomic_inc(&unix_nr_socks); sock_init_data(sock,sk); @@ -2023,6 +2024,7 @@ static struct file_operations unix_seq_f static struct net_proto_family unix_family_ops = { .family = PF_UNIX, .create = unix_create, + .multi_host = 1, .owner = THIS_MODULE, }; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cbb0ba3..76b5cf9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -125,6 +126,8 @@ int xfrm_dst_lookup(struct xfrm_dst **ds struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int err = 0; + /* This function is dead code and should be removed */ + BUG(); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; @@ -1032,8 +1035,8 @@ static int stale_bundle(struct dst_entry void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = &init_host.loopback_dev; + dev_hold(&init_host.loopback_dev); dev_put(dev); } } -- 1.0.GIT