Subject: [PATCH] nethost: Refactor so that using nethost is race free. From: Eric W. Biederman Date: 1137668682 -0700 For ipv4 and ipv6 I need tables that point to all of the per network host information. Initially I tried to allocate these tables lazily. That is racy and very hard to find the spots where it matters. This replaces the socket methods with a nethost notifier and the network host is actively tracked. Removing a lot of races, and werid corner cases from me having to worry about them. I still need to check at the entry points from user space that my state has been allocated. But except for netlink, sysctl, and proc that is just at socket creation time. ioctl doesn't trigger the weirdness as it uses a socket. It is now guaranteed that you will have a host creation event before any devices are moved to that host. In addition it is now guaranteed that loopback device will be the first device registered. A flag .multi_host was added to net_proto_family so I can flag the network protocols that are safe to use on something besides the default host. Previously I tested for a host_init method but since that was so rarely needed even for the protocols I had converted and because the rest of network stack uses notifiers I changed it. --- include/linux/net.h | 5 -- include/linux/nethost.h | 22 +++++++++ include/net/ip6_host.h | 2 - include/net/ip_host.h | 2 - net/core/host.c | 107 ++++++++++++++++++++++++++++++++++++++++++---- net/ipv4/af_inet.c | 86 +++++++++++++++++-------------------- net/ipv4/devinet.c | 21 ++++++--- net/ipv4/fib_frontend.c | 6 +++ net/ipv4/fib_rules.c | 27 +++++++++--- net/ipv4/route.c | 3 + net/ipv6/addrconf.c | 27 +++++++++--- net/ipv6/af_inet6.c | 78 +++++++++++++++++----------------- net/ipv6/ndisc.c | 2 - net/ipv6/route.c | 48 +++++++++------------ net/netlink/af_netlink.c | 14 ++---- net/packet/af_packet.c | 8 --- net/socket.c | 39 ----------------- net/unix/af_unix.c | 14 ++---- 18 files changed, 293 insertions(+), 218 deletions(-) a723a1985c5ae8bc57a2b952d7ea68da50544080 diff --git a/include/linux/net.h b/include/linux/net.h index da9d702..4fff1d1 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -162,9 +162,8 @@ struct proto_ops { struct net_proto_family { int family; - int (*host_init)(struct nethost *host); - void (*host_fini)(struct nethost *host); int (*create)(struct socket *sock, int protocol); + unsigned multi_host : 1; /* These are counters for the number of different methods of each we support */ short authentication; @@ -179,8 +178,6 @@ struct kvec; extern int sock_wake_async(struct socket *sk, int how, int band); extern int sock_register(struct net_proto_family *fam); extern int sock_unregister(int family); -extern int sock_host_init(struct nethost *host); -extern void sock_host_fini(struct nethost *host); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, diff --git a/include/linux/nethost.h b/include/linux/nethost.h index a968712..b0e7d75 100644 --- a/include/linux/nethost.h +++ b/include/linux/nethost.h @@ -12,7 +12,8 @@ struct ip_host; struct ip6_host; struct nethost { - atomic_t count; + atomic_t count; /* To decide when the nethost should go */ + atomic_t use_count; /* For reference we destroy on demand */ struct new_utsname utsname; #ifdef CONFIG_NET struct class net_class; @@ -24,6 +25,25 @@ struct nethost { #endif }; + +static inline void hold_host(struct nethost *host) +{ + atomic_inc(&host->use_count); +} + +static inline void release_host(struct nethost *host) +{ + atomic_dec(&host->use_count); +} + +/* nethost notifier chain */ +#define NETHOST_CREATE 0x0001 +#define NETHOST_DESTROY 0x0002 + +struct notifier_block; +extern int register_nethost_notifier(struct notifier_block *nb); +extern int unregister_nethost_notifier(struct notifier_block *nb); + extern struct nethost init_host; extern int loopback_setup(struct net_device *loopback_dev); diff --git a/include/net/ip6_host.h b/include/net/ip6_host.h index d67afc2..0a415d5 100644 --- a/include/net/ip6_host.h +++ b/include/net/ip6_host.h @@ -14,6 +14,4 @@ struct ip6_host struct rt6_info *rt6_dflt_pointer; }; -extern struct ip6_host *ip6_host_get(struct nethost *host); - #endif /* _NET_IP6_HOST_H */ diff --git a/include/net/ip_host.h b/include/net/ip_host.h index 0cc673b..b247a68 100644 --- a/include/net/ip_host.h +++ b/include/net/ip_host.h @@ -20,6 +20,4 @@ struct ip_host struct inet_peer *peer_root; }; -extern struct ip_host *ip_host_get(struct nethost *host); - #endif /* _NET_IP_HOST_H */ diff --git a/net/core/host.c b/net/core/host.c index df5c821..d69b72c 100644 --- a/net/core/host.c +++ b/net/core/host.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #if 1 @@ -14,6 +15,43 @@ extern int netdev_sysfs_init(struct neth extern void netdev_sysfs_fini(struct nethost *host); #endif +/* + * Our notifier list + */ +static struct notifier_block *nethost_chain; + +static void nethost_fini(struct nethost *host) +{ + unsigned long rebroadcast_time, warning_time; + + /* Send the notifier once and hopefully everything went ok. */ + rtnl_lock(); + notifier_call_chain(&nethost_chain, NETHOST_DESTROY, host); + rtnl_unlock(); + + /* If the use count does not drop to 0 wait... */ + rebroadcast_time = warning_time = jiffies; + while (atomic_read(&host->use_count) != 0) { + if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { + rtnl_lock(); + + /* Rebroadcast unregister notification */ + notifier_call_chain(&nethost_chain, NETHOST_DESTROY, host); + + rtnl_unlock(); + } + msleep(250); + if (time_after(jiffies, warning_time + 10 * HZ)) { + printk(KERN_EMERG "nethost_fini: " + "waiting for nethost to become free. Usage " + "count = %d\n", + atomic_read(&host->use_count)); + warning_time = jiffies; + } + } + +} + static void do_put_host(void *arg) { struct nethost *host = arg; @@ -59,9 +97,7 @@ static void do_put_host(void *arg) rtnl_unlock(); /* Cleanup any remaning protocol state */ - rtnl_lock(); - sock_host_fini(host); - rtnl_unlock(); + nethost_fini(host); /* Now free the sysfs class */ netdev_sysfs_fini(host); @@ -88,14 +124,16 @@ static int nethost_setup(struct nethost rtnl_lock(); + err = notifier_call_chain(&nethost_chain, NETHOST_CREATE, host); + if (err & NOTIFY_STOP_MASK) { + err = -EAGAIN; + goto out_undo; + } + lo = &host->loopback_dev; lo->host = host; err = loopback_setup(lo); if (err) - goto out; - - err = sock_host_init(host); - if (err) goto out_undo; err = 0; @@ -104,8 +142,7 @@ out: return err; out_undo: - sock_host_fini(host); - unregister_netdevice(lo); + notifier_call_chain(&nethost_chain, NETHOST_DESTROY, host); goto out; } @@ -146,6 +183,58 @@ out_free_host: goto out; } +/* + * Host change register/unregister. These are not inline or static + * as we export them to the world. + */ + +/** + * register_nethost_notifier - register a network host notifier block + * @nb: notifier + * + * Register a notifier to be called when host events occur. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + * + * When registered all creation up events are replayed + * to the new notifier to allow device to have a race free + * view of the network device list. + */ + +int register_nethost_notifier(struct notifier_block *nb) +{ + struct net_device *dev; + int err; + rtnl_lock(); + err = notifier_chain_register(&nethost_chain, nb); + if (!err) { + for (dev = dev_base; dev; dev = dev->next) { + struct nethost *host = dev->host; + if (&host->loopback_dev != dev) + continue; + nb->notifier_call(nb, NETHOST_CREATE, host); + } + } + rtnl_unlock(); + return err; +} + +/** + * unregister_nethost_notifier - unregister a network host notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_host_notifier(). The notifier is unlinked from the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_nethost_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&nethost_chain, nb); +} + __init int nethost_init(void) { return nethost_setup(&init_host); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8661c32..4f4d04f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -218,57 +218,44 @@ out: } /* - * Initialize the per host ipv4 state. + * Create or destroy the per host ipv4 state. */ -static int inet_host_init(struct nethost *host) +static int ip_nethost_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!host->ip_host) { - struct ip_host *ihost; + struct nethost *host = ptr; + struct ip_host *ihost = host->ip_host; + int ret = NOTIFY_OK; + + ASSERT_RTNL(); + + switch(event) { + case NETHOST_CREATE: ihost = kzalloc(sizeof(*ihost), GFP_KERNEL); - if (!ihost) - return -ENOMEM; - ihost->host = host; + if (!ihost) { + ret = NOTIFY_BAD; + break; + } host->ip_host = ihost; - - /* - * Set up the IP module - */ + ihost->host = host; ip_host_init(ihost); + break; + case NETHOST_DESTROY: + if (!ihost) + break; - } - return 0; -} - -/* - * Cleanup the per host ipv4 state. - */ - -static void inet_host_fini(struct nethost *host) -{ - struct ip_host *ihost; - - ASSERT_RTNL(); - - ihost = host->ip_host; - host->ip_host = NULL; - - /* - * Cleanup the IP module - */ - ip_host_fini(ihost); + ip_host_fini(ihost); - /* Finally free the ihost structure */ - kfree(ihost); + host->ip_host = NULL; + kfree(ihost); + break; + } + return ret; } -struct ip_host *ip_host_get(struct nethost *host) -{ - struct ip_host *ihost = NULL; - if (inet_host_init(host) == 0) - ihost = host->ip_host; - return ihost; -} +static struct notifier_block ip_nethost_notifier = { + .notifier_call = ip_nethost_event, +}; /* * Create an inet socket. @@ -288,7 +275,7 @@ static int inet_create(struct socket *so int err = -ESOCKTNOSUPPORT; /* Ensure I have an inet host structure */ - ihost = ip_host_get(current->host); + ihost = current->host->ip_host; if (!ihost) goto out; @@ -920,11 +907,10 @@ static struct proto_ops inet_sockraw_ops }; static struct net_proto_family inet_family_ops = { - .family = PF_INET, - .host_init = inet_host_init, - .host_fini = inet_host_fini, - .create = inet_create, - .owner = THIS_MODULE, + .family = PF_INET, + .multi_host = 1, + .create = inet_create, + .owner = THIS_MODULE, }; /* Upon startup we insert all the elements in inetsw_array[] into @@ -1260,6 +1246,12 @@ static int __init inet_init(void) inet_register_protosw(q); /* + * Start tracking per host ipv4 state. + */ + + register_nethost_notifier(&ip_nethost_notifier); + + /* * Set the ARP module up */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 28805f5..0b0067a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -409,6 +409,9 @@ static int inet_rtm_deladdr(struct sk_bu ASSERT_RTNL(); + if (!host->ip_host) + goto out; + if ((in_dev = inetdev_by_index(host, ifm->ifa_index)) == NULL) goto out; __in_dev_put(in_dev); @@ -444,6 +447,9 @@ static int inet_rtm_newaddr(struct sk_bu ASSERT_RTNL(); + if (!host->ip_host) + goto out; + if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) goto out; @@ -961,13 +967,12 @@ static int inetdev_event(struct notifier { struct net_device *dev = ptr; struct nethost *host = dev->host; - struct ip_host *ihost; + struct ip_host *ihost = host->ip_host; struct in_device *in_dev; ASSERT_RTNL(); /* Ensure I have an inet host structure */ - ihost = ip_host_get(host); if (!ihost) goto out; @@ -1075,7 +1080,7 @@ rtattr_failure: static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { - int idx, ip_idx; + int idx, ip_idx, next_idx; struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; @@ -1083,14 +1088,14 @@ static int inet_dump_ifaddr(struct sk_bu s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); - for (dev = dev_base, idx = 0; dev; dev = dev->next) { - int lidx; + for (dev = dev_base, idx = 0; dev; dev = dev->next, idx = next_idx) { + next_idx = idx; if (dev->host != skb->sk->sk_host) continue; - lidx = idx++; - if (lidx < s_idx) + next_idx++; + if (idx < s_idx) continue; - if (lidx > s_idx) + if (idx > s_idx) s_ip_idx = 0; rcu_read_lock(); if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0fc4e18..d2db61d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -309,6 +309,8 @@ int inet_rtm_delroute(struct sk_buff *sk struct rtmsg *r = NLMSG_DATA(nlh); struct ip_host *ihost = skb->sk->sk_host->ip_host; + if (!ihost) + return -EINVAL; if (inet_check_attr(r, rta)) return -EINVAL; @@ -325,6 +327,8 @@ int inet_rtm_newroute(struct sk_buff *sk struct rtmsg *r = NLMSG_DATA(nlh); struct ip_host *ihost = skb->sk->sk_host->ip_host; + if (!ihost) + return -EINVAL; if (inet_check_attr(r, rta)) return -EINVAL; @@ -341,6 +345,8 @@ int inet_dump_fib(struct sk_buff *skb, s struct fib_table *tb; struct ip_host *ihost = skb->sk->sk_host->ip_host; + if (!ihost) + return -EINVAL; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return ip_rt_dump(skb, cb); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index bee9bfa..ec3b519 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -76,6 +76,7 @@ struct fib_rule int r_dead; }; +#if 0 static struct fib_rule default_rule = { .r_clntref = ATOMIC_INIT(2), .r_preference = 0x7FFF, @@ -99,6 +100,7 @@ static struct fib_rule local_rule = { }; static struct fib_rule *fib_rules = &local_rule; +#endif static DEFINE_RWLOCK(fib_rules_lock); int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -106,9 +108,13 @@ int inet_rtm_delrule(struct sk_buff *skb struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct fib_rule *r, **rp; + struct ip_host *ihost = skb->sk->sk_host->ip_host; int err = -ESRCH; - for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) { + if (!ihost) + return -EINVAL; + + for (rp=&ihost->fib_rules; (r=*rp) != NULL; rp=&r->r_next) { if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && rtm->rtm_src_len == r->r_src_len && rtm->rtm_dst_len == r->r_dst_len && @@ -165,7 +171,10 @@ int inet_rtm_newrule(struct sk_buff *skb struct rtmsg *rtm = NLMSG_DATA(nlh); struct fib_rule *r, *new_r, **rp; unsigned char table_id; - struct ip_host *ihost = in_host_get(skb->sk->sk_host); + struct ip_host *ihost = skb->sk->sk_host->ip_host; + + if (!ihost) + return -EINVAL; if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || (rtm->rtm_tos & ~IPTOS_TOS_MASK)) @@ -221,11 +230,11 @@ int inet_rtm_newrule(struct sk_buff *skb memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); #endif - rp = &fib_rules; + rp = &ihost->fib_rules; if (!new_r->r_preference) { r = fib_rules; if (r && (r = r->r_next) != NULL) { - rp = &fib_rules->r_next; + rp = &ihost->fib_rules->r_next; if (r->r_preference) new_r->r_preference = r->r_preference - 1; } @@ -289,7 +298,7 @@ int fib_lookup(const struct flowi *flp, struct ip_host *ihost; BUG_ON(!flp->host); - ihost = in_host_get(flp->host); + ihost = flp->host->ip_host; u32 daddr = flp->fl4_dst; u32 saddr = flp->fl4_src; @@ -350,7 +359,7 @@ void fib_select_default(const struct flo if (res->r && res->r->r_action == RTN_UNICAST && FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { struct fib_table *tb; - struct ip_host *ihost = in_host_get(flp->host); + struct ip_host *ihost = flp->host->ip_host; if ((tb = fib_get_table(ihost, res->r->r_table)) != NULL) tb->tb_select_default(tb, flp, res); } @@ -422,13 +431,17 @@ rtattr_failure: int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { + struct ip_host *ihost = skb->sk->sk_host->ip_host; int idx; int s_idx = cb->args[0]; struct fib_rule *r; + if (!ihost) + return -EINVAL; + read_lock(&fib_rules_lock); for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { - if (idx < s_idx) + if (idx++ < s_idx) continue; if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c5a659a..fa18233 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1,3 +1,4 @@ + /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -2763,6 +2764,8 @@ int inet_rtm_getroute(struct sk_buff *in struct sk_buff *skb; struct nethost *host = in_skb->sk->sk_host; + if (!host->ip_host) + return -EINVAL; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 08507e3..8843b1a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1993,7 +1993,7 @@ static int addrconf_notify(struct notifi { struct net_device *dev = (struct net_device *) data; struct inet6_dev *idev = __in6_dev_get(dev); - struct ip6_host *ihost = ip6_host_get(dev->host); + struct ip6_host *ihost = dev->host->ip6_host; /* If we don't have a support for this interface forget it */ if (unlikely(!ihost)) @@ -2608,6 +2608,10 @@ inet6_rtm_deladdr(struct sk_buff *skb, s struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + struct nethost *host = skb->sk->sk_host; + + if (!host->ip6_host) + return -EINVAL; pfx = NULL; if (rta[IFA_ADDRESS-1]) { @@ -2623,7 +2627,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, s if (pfx == NULL) return -EINVAL; - return inet6_addr_del(skb->sk->sk_host, ifm->ifa_index, pfx, ifm->ifa_prefixlen); + return inet6_addr_del(host, ifm->ifa_index, pfx, ifm->ifa_prefixlen); } static int @@ -2632,7 +2636,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, s struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + struct nethost *host = skb->sk->sk_host; + if (!host->ip6_host) + return -EINVAL; pfx = NULL; if (rta[IFA_ADDRESS-1]) { if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) @@ -2647,7 +2654,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, s if (pfx == NULL) return -EINVAL; - return inet6_addr_add(skb->sk->sk_host, ifm->ifa_index, pfx, ifm->ifa_prefixlen); + return inet6_addr_add(host, ifm->ifa_index, pfx, ifm->ifa_prefixlen); } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, @@ -2781,7 +2788,7 @@ enum addr_type_t static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type) { - int idx, ip_idx; + int idx, ip_idx, next_idx; int s_idx, s_ip_idx; int err = 1; struct net_device *dev; @@ -2794,11 +2801,15 @@ static int inet6_dump_addr(struct sk_buf s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; host = skb->sk->sk_host; + if (!host->ip6_host) + return -EINVAL; read_lock(&dev_base_lock); - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + for (dev = dev_base, idx = 0; dev; dev = dev->next, idx = next_idx) { + next_idx = idx; if (dev->host != host) continue; + next_idx++; if (idx < s_idx) continue; if (idx > s_idx) @@ -3001,8 +3012,10 @@ static int inet6_dump_ifinfo(struct sk_b struct inet6_dev *idev; read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { - if (idx < s_idx) + for (dev=dev_base, idx=0; dev; dev = dev->next) { + if (dev->host != skb->sk->sk_host) + continue; + if (idx++ < s_idx) continue; if ((idev = in6_dev_get(dev)) == NULL) continue; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bb90513..fe0cec1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -85,48 +85,44 @@ static __inline__ struct ipv6_pinfo *ine } /* - * Initialize the per host ipv4 state. + * Create or destroy the per host ipv6 state. */ -static int inet6_host_init(struct nethost *host) +static int ip6_nethost_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!host->ip6_host) { - struct ip6_host *ihost; + struct nethost *host = ptr; + struct ip6_host *ihost = host->ip6_host; + int ret = NOTIFY_OK; + + ASSERT_RTNL(); + + switch(event) { + case NETHOST_CREATE: ihost = kzalloc(sizeof(*ihost), GFP_KERNEL); - if (!ihost) - return -ENOMEM; - ihost->host = host; + if (!ihost) { + ret = NOTIFY_BAD; + break; + } host->ip6_host = ihost; + ihost->host = host; ip6_route_host_init(ihost); - } - return 0; -} - -/* - * Cleanup the per host ipv4 state. - */ - -static void inet6_host_fini(struct nethost *host) -{ - struct ip6_host *ihost; - - ASSERT_RTNL(); + break; + case NETHOST_DESTROY: + if (!ihost) + break; - ihost = host->ip6_host; - host->ip6_host = NULL; + ip6_route_host_cleanup(ihost); - ip6_route_host_cleanup(ihost); - - kfree(ihost); + host->ip6_host = NULL; + kfree(ihost); + break; + } + return ret; } -struct ip6_host *ip6_host_get(struct nethost *host) -{ - struct ip6_host *ihost = NULL; - if (inet6_host_init(host) == 0) - ihost = host->ip6_host; - return ihost; -} +static struct notifier_block ip6_nethost_notifier = { + .notifier_call = ip6_nethost_event, +}; static int inet6_create(struct socket *sock, int protocol) { @@ -142,7 +138,7 @@ static int inet6_create(struct socket *s int rc; rc = -ESOCKTNOSUPPORT; - ihost = ip6_host_get(current->host); + ihost = current->host->ip6_host; if (!ihost) goto out; @@ -470,6 +466,8 @@ int inet6_ioctl(struct socket *sock, uns struct sock *sk = sock->sk; int err = -EINVAL; + if (!current->host->ip6_host) + return -EINVAL; switch(cmd) { case SIOCGSTAMP: @@ -539,11 +537,10 @@ struct proto_ops inet6_dgram_ops = { }; static struct net_proto_family inet6_family_ops = { - .family = PF_INET6, - .host_init = inet6_host_init, - .host_fini = inet6_host_fini, - .create = inet6_create, - .owner = THIS_MODULE, + .family = PF_INET6, + .multi_host = 1, + .create = inet6_create, + .owner = THIS_MODULE, }; /* Same as inet6_dgram_ops, sans udp_poll. */ @@ -759,6 +756,11 @@ static int __init inet6_init(void) */ (void) sock_register(&inet6_family_ops); + /* + * Start tracking per host ipv6 state. + */ + register_nethost_notifier(&ip6_nethost_notifier); + /* Initialise ipv6 mibs */ err = init_ipv6_mibs(); if (err) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c8a04c9..a8dfa6c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1510,7 +1510,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct ip6_host *ihost = ip6_host_get(dev->host); + struct ip6_host *ihost = dev->host->ip6_host; switch (event) { case NETDEV_CHANGEADDR: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f248949..f78cb36 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -341,14 +341,10 @@ struct rt6_info *rt6_lookup(struct netho struct in6_addr *daddr, struct in6_addr *saddr, int oif, int strict) { - struct ip6_host *ihost; + struct ip6_host *ihost = host->ip6_host; struct fib6_node *fn; struct rt6_info *rt; - ihost = ip6_host_get(host); - if (!ihost) - return NULL; - read_lock_bh(&rt6_lock); fn = fib6_lookup(&ihost->ip6_routing_table, daddr, saddr); rt = rt6_device_match(ihost, fn->leaf, oif, strict); @@ -372,12 +368,9 @@ struct rt6_info *rt6_lookup(struct netho int ip6_ins_rt(struct nethost *host, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct sk_buff *in_skb) { - struct ip6_host *ihost; + struct ip6_host *ihost = host->ip6_host; int err; - ihost = ip6_host_get(host); - if (!ihost) - return -ENOENT; write_lock_bh(&rt6_lock); err = fib6_add(&ihost->ip6_routing_table, rt, nlh, _rtattr, in_skb); write_unlock_bh(&rt6_lock); @@ -450,16 +443,12 @@ if (rt == &ihost->ip6_null_entry && stri void ip6_route_input(struct sk_buff *skb) { - struct ip6_host *ihost; + struct ip6_host *ihost = skb->dev->host->ip6_host; struct fib6_node *fn; struct rt6_info *rt; int strict; int attempts = 3; - ihost = ip6_host_get(skb->dev->host); - if (!ihost) - return; - strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); relookup: @@ -788,7 +777,6 @@ int ip6_route_add(struct nethost *host, struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct sk_buff *in_skb) { - struct ip6_host *ihost; int err; struct rtmsg *r; struct rtattr **rta; @@ -797,10 +785,6 @@ int ip6_route_add(struct nethost *host, struct inet6_dev *idev = NULL; int addr_type; - ihost = ip6_host_get(host); - if (!ihost) - return -ENODEV; - rta = (struct rtattr **) _rtattr; if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) @@ -1575,20 +1559,26 @@ int inet6_rtm_delroute(struct sk_buff *s { struct rtmsg *r = NLMSG_DATA(nlh); struct in6_rtmsg rtmsg; + struct nethost *host = skb->sk->sk_host; + if (!host->ip6_host) + return -EINVAL; if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(skb->sk->sk_host, &rtmsg, nlh, arg, skb); + return ip6_route_del(host, &rtmsg, nlh, arg, skb); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct rtmsg *r = NLMSG_DATA(nlh); struct in6_rtmsg rtmsg; + struct nethost *host = skb->sk->sk_host; + if (!host->ip6_host) + return -EINVAL; if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(skb->sk->sk_host, &rtmsg, nlh, arg, skb); + return ip6_route_add(host, &rtmsg, nlh, arg, skb); } struct rt6_rtnl_dump_arg @@ -1745,14 +1735,13 @@ static int fib6_dump_done(struct netlink int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct ip6_host *ihost; + struct ip6_host *ihost = skb->sk->sk_host->ip6_host; struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; int res; - ihost = ip6_host_get(skb->sk->sk_host); if (!ihost) - return 0; + return -EINVAL; arg.skb = skb; arg.cb = cb; @@ -1806,12 +1795,16 @@ int inet6_rtm_getroute(struct sk_buff *i { struct rtattr **rta = arg; int iif = 0; - int err = -ENOBUFS; + int err; struct sk_buff *skb; struct flowi fl; struct rt6_info *rt; struct nethost *host = in_skb->sk->sk_host; + err = -EINVAL; + if (!host->ip6_host) + goto out; + err = -ENOBUFS; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto out; @@ -2039,8 +2032,9 @@ int ipv6_sysctl_rtcache_flush(ctl_table void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { - struct ip6_host *ihost; - ihost = ip6_host_get(current->host); + struct ip6_host *ihost = current->host->ip6_host; + if (!ihost) + return 0; proc_dointvec(ctl, write, filp, buffer, lenp, ppos); fib6_run_gc(ihost, flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cead1e6..f769665 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -451,12 +451,6 @@ out: return err; } -static int netlink_host_init(struct nethost *host) -{ - /* We don't need any per host state */ - return 0; -} - static int netlink_create(struct socket *sock, int protocol) { struct module *module = NULL; @@ -1712,10 +1706,10 @@ static struct proto_ops netlink_ops = { }; static struct net_proto_family netlink_family_ops = { - .family = PF_NETLINK, - .host_init = netlink_host_init, - .create = netlink_create, - .owner = THIS_MODULE, /* for consistency 8) */ + .family = PF_NETLINK, + .multi_host = 1, + .create = netlink_create, + .owner = THIS_MODULE, /* for consistency 8) */ }; extern void netlink_skb_parms_too_large(void); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index da15c93..47a098e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -993,12 +993,6 @@ static struct proto packet_proto = { .obj_size = sizeof(struct packet_sock), }; -static int packet_host_init(struct nethost *host) -{ - /* We don't need any per host state */ - return 0; -} - /* * Create a packet of type SOCK_PACKET. */ @@ -1837,7 +1831,7 @@ static struct proto_ops packet_ops = { static struct net_proto_family packet_family_ops = { .family = PF_PACKET, - .host_init = packet_host_init, + .multi_host = 1, .create = packet_create, .owner = THIS_MODULE, }; diff --git a/net/socket.c b/net/socket.c index c050b8a..38050f8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1150,7 +1150,7 @@ static int __sock_create(int family, int /* Unless the address family supports multiple hosts on the same * machine. Fail if we are not the primary host. */ - if ((current->host != &init_host) && !net_families[family]->host_init) + if ((current->host != &init_host) && !net_families[family]->multi_host) goto out_module_put; if ((err = net_families[family]->create(sock, protocol)) < 0) { @@ -1999,43 +1999,6 @@ asmlinkage long sys_socketcall(int call, #endif /* __ARCH_WANT_SYS_SOCKETCALL */ -int sock_host_init(struct nethost *host) -{ - int err = 0, i; - - net_family_read_lock(); - for(i = 0; !err && (i < NPROTO); i++) { - struct net_proto_family *family; - int (*init)(struct nethost *host); - family = net_families[i]; - if (family) { - init = family->host_init; - if (init) - err = init(host); - } - } - net_family_read_unlock(); - return err; -} - -void sock_host_fini(struct nethost *host) -{ - int i; - - net_family_read_lock(); - for(i = 0; i < NPROTO; i++) { - struct net_proto_family *family; - void (*fini)(struct nethost *host); - family = net_families[i]; - if (family) { - fini = family->host_fini; - if (fini) - fini(host); - } - } - net_family_read_unlock(); -} - /* * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 41c5b67..d37e778 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -545,12 +545,6 @@ static struct proto unix_proto = { .obj_size = sizeof(struct unix_sock), }; -static int unix_host_init(struct nethost *host) -{ - /* We don't need any per host state */ - return 0; -} - static struct sock * unix_create1(struct socket *sock) { struct sock *sk = NULL; @@ -2047,10 +2041,10 @@ static struct file_operations unix_seq_f #endif static struct net_proto_family unix_family_ops = { - .family = PF_UNIX, - .host_init = unix_host_init, - .create = unix_create, - .owner = THIS_MODULE, + .family = PF_UNIX, + .multi_host = 1, + .create = unix_create, + .owner = THIS_MODULE, }; static int __init af_unix_init(void) -- 1.0.GIT