Subject: [PATCH] nethost ipv4: Update state allocation so it works by design From: Eric W. Biederman Date: 1136921094 -0700 This completes my audit of the initialization and cleanup paths for the ipv4 stack. Still todo is looking at all of the ioctls and netlink calls so I can create struct ip_host lazily. This patch also fixes time wait sockets to have a struct host * member so the can be attributed to the proper network host. --- include/net/inet6_hashtables.h | 3 ++ include/net/inet_hashtables.h | 12 ++++++---- include/net/inet_timewait_sock.h | 3 ++ include/net/inetpeer.h | 7 +++++- include/net/ip.h | 3 ++ include/net/ip_fib.h | 13 ++++++---- include/net/ip_host.h | 8 ++---- include/net/route.h | 2 ++ include/net/sock.h | 5 ++-- net/ipv4/af_inet.c | 36 ++++++++++++++++++++++------- net/ipv4/devinet.c | 13 +++------- net/ipv4/fib_frontend.c | 48 ++++++++++++++++++++++++++------------ net/ipv4/fib_hash.c | 14 +++++++---- net/ipv4/fib_trie.c | 37 +++++++++++++++++------------ net/ipv4/inet_timewait_sock.c | 2 ++ net/ipv4/inetpeer.c | 32 ++++++++++++++++++------- net/ipv4/ip_output.c | 11 +++++++++ net/ipv4/route.c | 12 +++++++++- net/ipv4/tcp_ipv4.c | 9 ++++--- net/ipv6/tcp_ipv6.c | 1 + 20 files changed, 186 insertions(+), 85 deletions(-) 6b370407ad413dd7e5390adff14beb334403d2d1 diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index c912a44..991faa0 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -84,7 +84,8 @@ static inline struct sock * const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && - sk->sk_family == PF_INET6) { + sk->sk_family == PF_INET6 && + sk->sk_host == host) { const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index d27f9f6..361d382 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -353,11 +353,12 @@ sherry_cache: ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ ((__host) == (__sk)->sk_host)) -#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ +#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __host)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ + ((__host) == (__sk)->sk_host)) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __host)\ @@ -367,12 +368,13 @@ sherry_cache: ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ ((__host) == (__sk)->sk_host)) -#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif, __host)\ (((__sk)->sk_hash == (__hash)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ + ((__host) == (__sk)->sk_host)) #endif /* 64-bit arch */ /* @@ -406,7 +408,7 @@ static inline struct sock * /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { - if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, host)) goto hit; } sk = NULL; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4ade56e..4d44721 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -114,6 +115,7 @@ struct inet_timewait_sock { #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_host __tw_common.skc_host volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ unsigned char tw_rcv_wscale; @@ -197,6 +199,7 @@ static inline void inet_twsk_put(struct printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif + put_host(tw->tw_host); kmem_cache_free(tw->tw_prot->twsk_slab, tw); } } diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 7fda471..ff6adea 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -15,6 +15,9 @@ #include #include +struct nethost; +struct ip_host; + struct inet_peer { struct inet_peer *avl_left, *avl_right; @@ -22,6 +25,7 @@ struct inet_peer unsigned long dtime; /* the time of last use of not * referenced entries */ atomic_t refcnt; + struct nethost *host; __u32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ @@ -30,9 +34,10 @@ struct inet_peer }; void inet_initpeers(void) __init; +void inet_peers_host_init(struct ip_host *ihost); /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(__u32 daddr, int create); +struct inet_peer *inet_getpeer(struct nethost *host, __u32 daddr, int create); extern spinlock_t inet_peer_unused_lock; extern struct inet_peer **inet_peer_unused_tailp; diff --git a/include/net/ip.h b/include/net/ip.h index eba0ee8..decb127 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -35,6 +35,7 @@ #include struct sock; +struct ip_host; struct inet_skb_parm { @@ -96,6 +97,8 @@ extern int ip_do_nat(struct sk_buff *sk extern void ip_send_check(struct iphdr *ip); extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok); extern void ip_init(void); +extern void ip_host_init(struct ip_host *ihost); +extern void ip_host_fini(struct ip_host *ihost); extern int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f525b63..c647f89 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -185,17 +185,17 @@ static inline struct fib_table *fib_new_ static inline int fib_lookup(const struct flowi *flp, struct fib_result *res) { - struct ip_host *ihost = in_host_get(flp->fl.host); - if (ihost->ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) && - ihost->ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res)) + struct ip_host *ihost = flp->host->ip_host; + if (ihost->ip_fib_local_table->tb_lookup(ihost->ip_fib_local_table, flp, res) && + ihost->ip_fib_main_table->tb_lookup(ihost->ip_fib_main_table, flp, res)) return -ENETUNREACH; return 0; } static inline void fib_select_default(const struct flowi *flp, struct fib_result *res) { - struct ip_host *ihost = in_host_get(flp->fl.host); - struct fib_table *main_table = ihost->ip_fib_table_main_table; + struct ip_host *ihost = flp->host->ip_host; + struct fib_table *main_table = ihost->ip_fib_main_table; if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) main_table->tb_select_default(main_table, flp, res); } @@ -228,6 +228,8 @@ extern void fib_select_default(const str /* Exported by fib_frontend.c */ extern void ip_fib_init(void); +extern void ip_fib_host_init(struct ip_host *ihost); +extern void ip_fib_host_fini(struct ip_host *ihost); extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); @@ -247,6 +249,7 @@ extern u32 __fib_res_prefsrc(struct fib /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(int id); +extern void fib_hash_fini(struct fib_table *tb); #ifdef CONFIG_IP_MULTIPLE_TABLES /* Exported by fib_rules.c */ diff --git a/include/net/ip_host.h b/include/net/ip_host.h index 4b3c2ce..67f7517 100644 --- a/include/net/ip_host.h +++ b/include/net/ip_host.h @@ -4,6 +4,7 @@ #include struct fib_table; +struct inet_peer; struct ip_host { @@ -16,14 +17,11 @@ struct ip_host #define ip_fib_local_table fib_tables[RT_TABLE_LOCAL] #define ip_fib_main_table fib_tables[RT_TABLE_MAIN] #endif /* CONFIG_IP_MULTIPLE_TABLES */ + struct inet_peer *peer_root; }; -static inline struct ip_host *in_host_get(struct nethost *host) -{ - return host->ip_host; -} - extern int inet_host_init(struct nethost *host); extern void inet_host_fini(struct nethost *host); +extern struct ip_host *ip_host_get(struct nethost *host); #endif /* _NET_IP_HOST_H */ diff --git a/include/net/route.h b/include/net/route.h index 871b683..73dd40f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -110,6 +110,8 @@ extern struct ip_rt_acct *ip_rt_acct; struct in_device; extern int ip_rt_init(void); +extern void ip_rt_host_init(struct ip_host *ihost); +extern void ip_rt_host_fini(struct ip_host *ihost); extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, u32 src, u8 tos, struct net_device *dev); extern void ip_rt_advice(struct rtable **rp, int advice); diff --git a/include/net/sock.h b/include/net/sock.h index 2c9136e..91ca62f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -89,6 +89,7 @@ do { spin_lock_init(&((__sk)->sk_lock.sl struct sock; struct proto; +struct nethost; /** * struct sock_common - minimal network layer representation of sockets @@ -115,9 +116,9 @@ struct sock_common { atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + struct nethost *skc_host; }; -struct nethost; /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -190,6 +191,7 @@ struct sock { #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_host __sk_common.skc_host unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; @@ -244,7 +246,6 @@ struct sock { struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; int sk_write_pending; - struct nethost *sk_host; void *sk_security; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index af952b5..d73e404 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -222,8 +222,6 @@ out: */ int inet_host_init(struct nethost *host) { - /* FIXME flesh me out */ - /* FIXME init_host_init calling race */ if (!host->ip_host) { struct ip_host *ihost; ihost = kzalloc(sizeof(*ihost), GFP_KERNEL); @@ -231,6 +229,13 @@ int inet_host_init(struct nethost *host) return -ENOMEM; ihost->host = host; host->ip_host = ihost; + + /* + * Set up the IP module + */ + + ip_host_init(ihost); + } return 0; } @@ -244,15 +249,31 @@ void inet_host_fini(struct nethost *host struct ip_host *ihost; ihost = host->ip_host; host->ip_host = NULL; + + /* + * Cleanup the IP module + */ + ip_host_fini(ihost); + + /* Finally free the ihost structure */ kfree(ihost); } +struct ip_host *ip_host_get(struct nethost *host) +{ + struct ip_host *ihost = NULL; + if (inet_host_init(host) == 0) + ihost = host->ip_host; + return ihost; +} + /* * Create an inet socket. */ static int inet_create(struct socket *sock, int protocol) { + struct ip_host *ihost; struct sock *sk; struct list_head *p; struct inet_protosw *answer; @@ -263,6 +284,11 @@ static int inet_create(struct socket *so int try_loading_module = 0; int err = -ESOCKTNOSUPPORT; + /* Ensure I have an inet host structure */ + ihost = ip_host_get(current->host); + if (!ihost) + goto out; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ @@ -1190,12 +1216,6 @@ static int __init inet_init(void) goto out; } -#if 0 - rc = inet_host_init(current->host); - if (rc) - goto out; -#endif - rc = proto_register(&tcp_prot, 1); if (rc) goto out; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 90e38b3..28805f5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -966,15 +966,10 @@ static int inetdev_event(struct notifier ASSERT_RTNL(); - ihost = in_host_get(host); - if (!ihost) { - inet_host_init(host); - ihost = in_host_get(host); - if (!ihost) - panic("devinet: Failed to create ihost\n"); - if (!ihost) - goto out; - } + /* Ensure I have an inet host structure */ + ihost = ip_host_get(host); + if (!ihost) + goto out; in_dev = __in_dev_get_rtnl(dev); if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1caf7ba..0fc4e18 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -83,8 +83,8 @@ static void fib_flush(struct ip_host *ih flushed += tb->tb_flush(tb); } #else /* CONFIG_IP_MULTIPLE_TABLES */ - flushed += ihost->ip_fib_main_table->tb_flush(ip_fib_main_table); - flushed += ihost->ip_fib_local_table->tb_flush(ip_fib_local_table); + flushed += ihost->ip_fib_main_table->tb_flush(ihost->ip_fib_main_table); + flushed += ihost->ip_fib_local_table->tb_flush(ihost->ip_fib_local_table); #endif /* CONFIG_IP_MULTIPLE_TABLES */ if (flushed) @@ -102,7 +102,7 @@ struct net_device * ip_dev_find(struct n struct fib_result res; struct net_device *dev = NULL; struct fib_table *local_table; - struct ip_host *ihost = in_host_get(host); + struct ip_host *ihost = host->ip_host; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; @@ -139,7 +139,7 @@ unsigned inet_addr_type(struct nethost * res.r = NULL; #endif - local_table = in_host_get(host)->ip_fib_local_table; + local_table = host->ip_host->ip_fib_local_table; if (local_table) { ret = RTN_UNICAST; if (!local_table->tb_lookup(local_table, &fl, &res)) { @@ -238,7 +238,7 @@ e_inval: int ip_rt_ioctl(unsigned int cmd, void __user *arg) { - struct ip_host *ihost = in_host_get(current->host); + struct ip_host *ihost = current->host->ip_host; int err; struct kern_rta rta; struct rtentry r; @@ -307,7 +307,7 @@ int inet_rtm_delroute(struct sk_buff *sk struct fib_table * tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); - struct ip_host *ihost = in_host_get(skb->sk->sk_host); + struct ip_host *ihost = skb->sk->sk_host->ip_host; if (inet_check_attr(r, rta)) return -EINVAL; @@ -323,7 +323,7 @@ int inet_rtm_newroute(struct sk_buff *sk struct fib_table * tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); - struct ip_host *ihost = in_host_get(skb->sk->sk_host); + struct ip_host *ihost = skb->sk->sk_host->ip_host; if (inet_check_attr(r, rta)) return -EINVAL; @@ -339,7 +339,7 @@ int inet_dump_fib(struct sk_buff *skb, s int t; int s_t; struct fib_table *tb; - struct ip_host *ihost = in_host_get(skb->sk->sk_host); + struct ip_host *ihost = skb->sk->sk_host->ip_host; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) @@ -379,7 +379,7 @@ static void fib_magic(int cmd, int type, struct rtmsg rtm; } req; struct kern_rta rta; - struct ip_host *ihost = in_host_get(ifa->ifa_dev->dev->host); + struct ip_host *ihost = ifa->ifa_dev->dev->host->ip_host; memset(&req.rtm, 0, sizeof(req.rtm)); memset(&rta, 0, sizeof(rta)); @@ -460,7 +460,7 @@ static void fib_del_ifaddr(struct in_ifa struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa; struct nethost *host = dev->host; - struct ip_host *ihost = in_host_get(host); + struct ip_host *ihost = host->ip_host; u32 brd = ifa->ifa_address|~ifa->ifa_mask; u32 any = ifa->ifa_address&ifa->ifa_mask; #define LOCAL_OK 1 @@ -558,7 +558,7 @@ static void nl_fib_input(struct sock *sk int err; u32 pid; struct fib_table *tb; - struct ip_host *ihost = in_host_get(sk->sk_host); + struct ip_host *ihost = sk->sk_host->ip_host; skb = skb_recv_datagram(sk, 0, 0, &err); nlh = (struct nlmsghdr *)skb->data; @@ -582,7 +582,7 @@ static void nl_fib_lookup_init(void) static void fib_disable_ip(struct net_device *dev, int force) { - struct ip_host *ihost = in_host_get(dev->host); + struct ip_host *ihost = dev->host->ip_host; if (fib_sync_down(0, dev, force)) fib_flush(ihost); rt_cache_flush(0); @@ -658,12 +658,30 @@ static struct notifier_block fib_netdev_ .notifier_call =fib_netdev_event, }; -void __init ip_fib_init(void) +void ip_fib_host_init(struct ip_host *ihost) +{ +#ifndef CONFIG_IP_MULTIPLE_TABLES + ihost->ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + ihost->ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); +#endif +} + +void ip_fib_host_fini(struct ip_host *ihost) { #ifndef CONFIG_IP_MULTIPLE_TABLES - ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + fib_hash_fini(ihost->ip_fib_local_table); + fib_hash_fini(ihost->ip_fib_main_table); #else + int id; + for(id = 0; id <= RT_TABLE_MAX; id++) { + fib_hash_fini(ihost->fib_tables[id]); + } +#endif +} + +void __init ip_fib_init(void) +{ +#ifdef CONFIG_IP_MULTIPLE_TABLES fib_rules_init(); #endif diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index e42f412..0a1526f 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -766,11 +766,15 @@ static int fn_hash_dump(struct fib_table return skb->len; } -#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_hash_fini(struct fib_table *tb) +{ + if (!tb) + return; + fn_hash_flush(tb); + kfree(tb); +} + struct fib_table * fib_hash_init(int id) -#else -struct fib_table * __init fib_hash_init(int id) -#endif { struct fib_table *tb; @@ -1062,7 +1066,7 @@ static int fib_seq_open(struct inode *in seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); - s->ihost = in_host_get(current->host); + s->ihost = current->host->ip_host; out: return rc; out_kfree: diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6c33fb8..0311e23 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -166,7 +166,6 @@ static struct tnode *halve(struct trie * static void tnode_free(struct tnode *tn); static kmem_cache_t *fn_alias_kmem __read_mostly; -static struct trie *trie_local = NULL, *trie_main = NULL; /* rcu_read_lock needs to be hold by caller from readside */ @@ -1934,13 +1933,17 @@ out: return -1; } +void fib_hash_fini(struct fib_table *tb) +{ + if (!tb) + return; + fn_trie_flush(tb); + kfree(tb); +} + /* Fix more generic FIB names for init later */ -#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table * fib_hash_init(int id) -#else -struct fib_table * __init fib_hash_init(int id) -#endif { struct fib_table *tb; struct trie *t; @@ -1970,11 +1973,6 @@ struct fib_table * __init fib_hash_init( trie_init(t); if (id == RT_TABLE_LOCAL) - trie_local = t; - else if (id == RT_TABLE_MAIN) - trie_main = t; - - if (id == RT_TABLE_LOCAL) printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; @@ -1983,6 +1981,7 @@ struct fib_table * __init fib_hash_init( #ifdef CONFIG_PROC_FS /* Depth first Trie walk iterator */ struct fib_trie_iter { + struct trie *trie_local, *trie_main; struct tnode *tnode; struct trie *trie; unsigned index; @@ -2128,8 +2127,12 @@ static void trie_show_stats(struct seq_f static int fib_triestat_seq_show(struct seq_file *seq, void *v) { + struct trie *trie_local, *trie_main; struct trie_stat *stat; + trie_local = (struct trie *)(current->host->ip_host->ip_fib_local_table->tb_data); + trie_main = (struct trie *)(current->host->ip_host->ip_fib_local_table->tb_data); + stat = kmalloc(sizeof(*stat), GFP_KERNEL); if (!stat) return -ENOMEM; @@ -2172,13 +2175,13 @@ static struct node *fib_trie_get_idx(str loff_t idx = 0; struct node *n; - for (n = fib_trie_get_first(iter, trie_local); + for (n = fib_trie_get_first(iter, iter->trie_local); n; ++idx, n = fib_trie_get_next(iter)) { if (pos == idx) return n; } - for (n = fib_trie_get_first(iter, trie_main); + for (n = fib_trie_get_first(iter, iter->trie_main); n; ++idx, n = fib_trie_get_next(iter)) { if (pos == idx) return n; @@ -2209,8 +2212,8 @@ static void *fib_trie_seq_next(struct se return v; /* continue scan in next trie */ - if (iter->trie == trie_local) - return fib_trie_get_first(iter, trie_main); + if (iter->trie == iter->trie_local) + return fib_trie_get_first(iter, iter->trie_main); return NULL; } @@ -2280,7 +2283,7 @@ static int fib_trie_seq_show(struct seq_ t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); if (!NODE_PARENT(n)) { - if (iter->trie == trie_local) + if (iter->trie == iter->trie_local) seq_puts(seq, ":\n"); else seq_puts(seq, "
:\n"); @@ -2333,6 +2336,8 @@ static int fib_trie_seq_open(struct inod if (!s) goto out; + s->trie_local = (struct trie *)(current->host->ip_host->ip_fib_local_table->tb_data); + s->trie_main = (struct trie *)(current->host->ip_host->ip_fib_local_table->tb_data); rc = seq_open(file, &fib_trie_seq_ops); if (rc) @@ -2451,6 +2456,8 @@ static int fib_route_seq_open(struct ino if (!s) goto out; + s->trie_local = (struct trie *)(current->host->ip_host->ip_fib_local_table->tb_data); + s->trie_main = (struct trie *)(current->host->ip_host->ip_fib_local_table->tb_data); rc = seq_open(file, &fib_route_seq_ops); if (rc) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f9076ef..4343e52 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -109,6 +109,8 @@ struct inet_timewait_sock *inet_twsk_all tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; + tw->tw_host = sk->sk_host; + get_host(tw->tw_host); atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2fc3fd3..4b37af2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Theory of operations. @@ -82,7 +83,6 @@ static struct inet_peer peer_fake_node = .avl_height = 0 }; #define peer_avl_empty (&peer_fake_node) -static struct inet_peer *peer_root = peer_avl_empty; static DEFINE_RWLOCK(peer_pool_lock); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ @@ -106,6 +106,13 @@ static DEFINE_TIMER(peer_periodic_timer, int inet_peer_gc_mintime = 10 * HZ, inet_peer_gc_maxtime = 120 * HZ; + +/* Called from ip_output.c:ip_host_init */ +void inet_peers_host_init(struct ip_host *ihost) +{ + ihost->peer_root = peer_avl_empty; +} + /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -158,12 +165,13 @@ static void unlink_from_unused(struct in } /* Called with local BH disabled and the pool lock held. */ -#define lookup(daddr) \ +#define lookup(host, daddr) \ ({ \ struct inet_peer *u, **v; \ + struct ip_host *ihost = host->ip_host; \ stackptr = stack; \ - *stackptr++ = &peer_root; \ - for (u = peer_root; u != peer_avl_empty; ) { \ + *stackptr++ = &ihost->peer_root; \ + for (u = ihost->peer_root; u != peer_avl_empty; ) { \ if (daddr == u->v4daddr) \ break; \ if (daddr < u->v4daddr) \ @@ -294,7 +302,7 @@ static void unlink_from_pool(struct inet if (atomic_read(&p->refcnt) == 1) { struct inet_peer **stack[PEER_MAXDEPTH]; struct inet_peer ***stackptr, ***delp; - if (lookup(p->v4daddr) != p) + if (lookup(p->host, p->v4daddr) != p) BUG(); delp = stackptr - 1; /* *delp[0] == p */ if (p->avl_left == peer_avl_empty) { @@ -324,8 +332,10 @@ static void unlink_from_pool(struct inet } write_unlock_bh(&peer_pool_lock); - if (do_free) + if (do_free) { + put_host(p->host); kmem_cache_free(peer_cachep, p); + } else /* The node is used again. Decrease the reference counter * back. The loop "cleanup -> unlink_from_unused @@ -373,14 +383,14 @@ static int cleanup_once(unsigned long tt } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__u32 daddr, int create) +struct inet_peer *inet_getpeer(struct nethost *host, __u32 daddr, int create) { struct inet_peer *p, *n; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; /* Look up for the address quickly. */ read_lock_bh(&peer_pool_lock); - p = lookup(daddr); + p = lookup(host, daddr); if (p != peer_avl_empty) atomic_inc(&p->refcnt); read_unlock_bh(&peer_pool_lock); @@ -403,10 +413,12 @@ struct inet_peer *inet_getpeer(__u32 dad atomic_set(&n->refcnt, 1); n->ip_id_count = secure_ip_id(daddr); n->tcp_ts_stamp = 0; + n->host = host; + get_host(n->host); write_lock_bh(&peer_pool_lock); /* Check if an entry has suddenly appeared. */ - p = lookup(daddr); + p = lookup(host, daddr); if (p != peer_avl_empty) goto out_free; @@ -428,6 +440,8 @@ out_free: write_unlock_bh(&peer_pool_lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); + /* Drop references to the network host */ + put_host(n->host); /* Free preallocated the preallocated node. */ kmem_cache_free(peer_cachep, n); return p; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e80c03d..ecf73ee 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1314,6 +1314,17 @@ void ip_send_reply(struct sock *sk, stru ip_rt_put(rt); } +void ip_host_init(struct ip_host *ihost) +{ + ip_rt_host_init(ihost); + inet_peers_host_init(ihost); +} + +void ip_host_fini(struct ip_host *ihost) +{ + ip_rt_host_fini(ihost); +} + void __init ip_init(void) { ip_rt_init(); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 521ce1b..7508079 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1043,7 +1043,7 @@ void rt_bind_peer(struct rtable *rt, int static DEFINE_SPINLOCK(rt_peer_lock); struct inet_peer *peer; - peer = inet_getpeer(rt->rt_dst, create); + peer = inet_getpeer(rt->fl.host, rt->rt_dst, create); spin_lock_bh(&rt_peer_lock); if (rt->peer == NULL) { @@ -3140,6 +3140,16 @@ static int __init set_rhash_entries(char } __setup("rhash_entries=", set_rhash_entries); +void ip_rt_host_init(struct ip_host *ihost) +{ + ip_fib_host_init(ihost); +} + +void ip_rt_host_fini(struct ip_host *ihost) +{ + ip_fib_host_fini(ihost); +} + int __init ip_rt_init(void) { int rc = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 430a4fd..20bb90b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -143,7 +144,7 @@ static int __tcp_v4_check_established(st sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, sk->sk_host)) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -1352,7 +1353,7 @@ int tcp_v4_remember_stamp(struct sock *s int release_it = 0; if (!rt || rt->rt_dst != inet->daddr) { - peer = inet_getpeer(inet->daddr, 1); + peer = inet_getpeer(sk->sk_host, inet->daddr, 1); release_it = 1; } else { if (!rt->peer) @@ -1377,7 +1378,7 @@ int tcp_v4_remember_stamp(struct sock *s int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer(tw->tw_host, tw->tw_daddr, 1); if (peer) { const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -1640,7 +1641,7 @@ static void *established_get_next(struct tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family) { + while (tw && ((tw->tw_family != st->family) || (tw->tw_host != st->host))) { tw = tw_next(tw); } if (tw) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 20994f7..85fa8b3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -342,6 +342,7 @@ static int __tcp_v6_check_established(st if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && + sk2->sk_host == sk->sk_host && ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { -- 1.0.GIT