Subject: [PATCH] nethost: ipv6 support From: Eric W. Biederman Date: 1134267791 -0700 - modify ipv6 sockets to match on sk_host - modify ipv6_chk_addr to take a nethost so we only allow ip addresses on the current host. - modify ipv6_get_saddr and ipv6_dev_get_saddr to take a nethost so we will only return a source address for the appropriate network. - modify addrconf_dst_alloc which generates destination routes to take a nethost parameter instead of an anycast flag. This does two things. * For local unicast addresses it allows selecting the loopback device for the nethost as the destination. * A nethost is not passed for anycast addresses (it doesn't make sense) so we use the presence of a nethost set RTF_LOCAL. - Add ifindex_host to return the host of a loopback device if the ifindex specifies a loopback device. - Add ipv6_host to return which host an incomming ipv6 skb is for. - modify datagram_send_ctl to take a nethost so that when setting packet info it has enough information to verify the source address is valid. - Add addrconf_flush_host to remove all nethost state when corresponding loopback device is unregistered. - Tag udp, raw, and tcp with INET_PROTOSW_MULTIHOST so I know they are safe to use in a multi host context. - modify all code paths that generate a flow for input to ip6_dst_lookup to set iif to the interface index of the loopback device for their nethost. This added information matches the existing model and it makes it available for source address selection. - modify ip6_dst_lookup to special case ipv6 loopback addresses as a destination. In that case it forces the output interface to be the input interface (ie the nethost loopback interface). Also compute the saddr filtering by the host obtained from looking at the fl.iff. I would use sk->sk_host to find the host instead of fl.iif except there are a few instances where sockets are not available or the socket is a dummy kernel mode socket that always set sk_host to &init_host (which isn't what I want and fl.iif which is set on a per packet basis can easily be more accurate). Of course in the current mode socket case frequently I won't need to run this code anyway as the source address is already set. - Remove the nonsense in ip6_route_add about devices which have dev->flags & IFF_LOOPBACK set and are not the loopback device. --- include/linux/ipv6.h | 5 +- include/linux/nethost.h | 13 +++++ include/net/addrconf.h | 12 ++--- include/net/inet6_hashtables.h | 16 ++++--- include/net/ip6_route.h | 13 ++++- include/net/rawv6.h | 2 - include/net/transp_v6.h | 1 net/ipv6/addrconf.c | 98 +++++++++++++++++++++++++--------------- net/ipv6/af_inet6.c | 5 +- net/ipv6/anycast.c | 4 +- net/ipv6/datagram.c | 5 +- net/ipv6/icmp.c | 7 ++- net/ipv6/inet6_hashtables.c | 11 ++-- net/ipv6/ip6_flowlabel.c | 7 ++- net/ipv6/ip6_output.c | 11 ++++ net/ipv6/ip6_tunnel.c | 4 +- net/ipv6/ipv6_sockglue.c | 2 - net/ipv6/ndisc.c | 4 +- net/ipv6/raw.c | 22 ++++++--- net/ipv6/route.c | 28 +++-------- net/ipv6/tcp_ipv6.c | 23 +++++++-- net/ipv6/udp.c | 17 ++++--- 22 files changed, 192 insertions(+), 118 deletions(-) f3f000d4b002a40f669c2b3b2ece699e3d60f1ec diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e0b9227..927690e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -372,13 +372,14 @@ static inline struct raw6_sock *raw6_sk( #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ +#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif, __host)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) &&\ + ((__host) == (__sk)->sk_host)) #endif /* __KERNEL__ */ diff --git a/include/linux/nethost.h b/include/linux/nethost.h index 9e394b4..48c4e15 100644 --- a/include/linux/nethost.h +++ b/include/linux/nethost.h @@ -85,4 +85,17 @@ static inline struct nethost *loopback_h return host; } +static inline struct nethost *ifindex_host(int ifindex) +{ + struct nethost *host = NULL; + if (ifindex) { + struct net_device *dev = dev_get_by_index(ifindex); + if (dev) { + host = loopback_host(dev); + dev_put(dev); + } + } + return host; +} + #endif /* _LINUX_NETHOST_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 06c3eba..67abfe8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -59,19 +59,19 @@ extern int addrconf_add_ifaddr(void __ extern int addrconf_del_ifaddr(void __user *arg); extern int addrconf_set_dstaddr(void __user *arg); -extern int ipv6_chk_addr(struct in6_addr *addr, +extern int ipv6_chk_addr(struct nethost *host, + struct in6_addr *addr, struct net_device *dev, int strict); -extern int ipv6_host_match(const struct sock *sk, - const int dif, - const struct in6_addr *addr); extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict); -extern int ipv6_get_saddr(struct dst_entry *dst, +extern int ipv6_get_saddr(struct nethost *host, + struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr); -extern int ipv6_dev_get_saddr(struct net_device *dev, +extern int ipv6_dev_get_saddr(struct nethost *host, + struct net_device *dev, struct in6_addr *daddr, struct in6_addr *saddr); extern int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *); diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 5a2beed..c912a44 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -60,7 +60,8 @@ static inline struct sock * const u16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, + struct nethost *host) { struct sock *sk; const struct hlist_node *node; @@ -75,7 +76,7 @@ static inline struct sock * read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif, host)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ @@ -104,26 +105,27 @@ hit: extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, - const int dif); + const int dif, struct nethost *host); static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const u16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, + struct nethost *host) { struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, host); if (sk) return sk; - return inet6_lookup_listener(hashinfo, daddr, hnum, dif); + return inet6_lookup_listener(hashinfo, daddr, hnum, dif, host); } extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const u16 sport, const struct in6_addr *daddr, const u16 dport, - const int dif); + const int dif, struct nethost *host); #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1f2e428..86cc097 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -14,6 +14,7 @@ #include #include #include +#include struct pol_chain { int type; @@ -77,8 +78,8 @@ extern int ndisc_dst_gc(int *more); extern void fib6_force_start_gc(void); extern struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, - int anycast); + struct nethost *host, + const struct in6_addr *addr); /* * support functions for ND @@ -139,5 +140,13 @@ static inline int ipv6_unicast_destinati return rt->rt6i_flags & RTF_LOCAL; } +static inline struct nethost *ipv6_host(struct sk_buff *skb) +{ + struct nethost *host = NULL; + if (ipv6_unicast_destination(skb)) { + host = loopback_host(skb->dst->dev); + } + return host; +} #endif #endif diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 14476a7..ec38534 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -11,7 +11,7 @@ extern int ipv6_raw_deliver(struct sk_bu extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr, - int dif); + int dif, struct nethost *host); extern int rawv6_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 4e86f2d..03afb99 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -35,6 +35,7 @@ extern int datagram_recv_ctl(struct so struct sk_buff *skb); extern int datagram_send_ctl(struct msghdr *msg, + struct nethost *host, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5ffc537..7c34835 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -130,6 +130,7 @@ static DEFINE_SPINLOCK(addrconf_verify_l static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); +static void addrconf_flush_host(struct nethost *host); static int addrconf_ifdown(struct net_device *dev, int how); static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); @@ -515,6 +516,14 @@ ipv6_add_addr(struct nethost *host, stru goto out; } + /* Deny adding addresses owned by other hosts to the loopback device */ + if ((idev->dev->flags & IFF_LOOPBACK) && + (idev->dev != &host->loopback_dev)) { + ADBG(("ipv6_add_addr: wrong host using loopback\n")); + err = -EINVAL; + goto out; + } + ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); if (ifa == NULL) { @@ -523,7 +532,7 @@ ipv6_add_addr(struct nethost *host, stru goto out; } - rt = addrconf_dst_alloc(idev, addr, 0); + rt = addrconf_dst_alloc(idev, host, addr); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto out; @@ -830,7 +839,7 @@ static int inline ipv6_saddr_pref(const #define IPV6_GET_SADDR_MAXSCORE(score) (score) #endif -int ipv6_dev_get_saddr(struct net_device *dev, +int ipv6_dev_get_saddr(struct nethost *host, struct net_device *dev, struct in6_addr *daddr, struct in6_addr *saddr) { struct inet6_ifaddr *ifp = NULL; @@ -848,7 +857,8 @@ int ipv6_dev_get_saddr(struct net_device */ if (dev) { - if (dev->flags & IFF_LOOPBACK) + if ((dev->flags & IFF_LOOPBACK) && + (!host || (dev == &host->loopback_dev))) scope = IFA_HOST; read_lock(&addrconf_lock); @@ -856,6 +866,8 @@ int ipv6_dev_get_saddr(struct net_device if (idev) { read_lock_bh(&idev->lock); for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (host && ifp->host != host) + continue; if (ifp->scope == scope) { if (ifp->flags&IFA_F_TENTATIVE) continue; @@ -899,6 +911,8 @@ int ipv6_dev_get_saddr(struct net_device if (idev) { read_lock_bh(&idev->lock); for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (host && ifp->host != host) + continue; if (ifp->scope == scope) { if (ifp->flags&IFA_F_TENTATIVE) continue; @@ -942,10 +956,10 @@ out: } -int ipv6_get_saddr(struct dst_entry *dst, +int ipv6_get_saddr(struct nethost *host, struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr) { - return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr); + return ipv6_dev_get_saddr(host, dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr); } @@ -984,13 +998,16 @@ static int ipv6_count_addresses(struct i return cnt; } -int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) +int ipv6_chk_addr(struct nethost *host, struct in6_addr *addr, + struct net_device *dev, int strict) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (host && (host != ifp->host)) + continue; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp->flags&IFA_F_TENTATIVE)) { if (dev == NULL || ifp->idev->dev == dev || @@ -1002,36 +1019,6 @@ int ipv6_chk_addr(struct in6_addr *addr, return ifp != NULL; } -int ipv6_host_match(const struct sock *sk, const int dif, const struct in6_addr *addr) -{ - struct net_device *dev; - struct inet6_dev *idev; - struct inet6_ifaddr *ifp; - int match; - match = 0; - dev = dev_get_by_index(dif); - if (!dev) - goto out; - - read_lock(&addrconf_lock); - if ((idev = __in6_dev_get(dev)) != NULL) { - read_lock_bh(&idev->lock); - for(ifp = idev->addr_list; ifp; ifp=ifp->if_next) { - if (ipv6_addr_equal(&ifp->addr, addr) && - (ifp->host == sk->sk_host)) - { - match = 1; - break; - } - } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - dev_put(dev); -out: - return match; -} - static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) { @@ -2068,8 +2055,10 @@ static int addrconf_notify(struct notifi /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ - case NETDEV_DOWN: case NETDEV_UNREGISTER: + addrconf_flush_host(loopback_host(dev)); + /* fall through */ + case NETDEV_DOWN: /* * Remove all addresses from this interface. */ @@ -2103,6 +2092,41 @@ static struct notifier_block ipv6_dev_no .priority = 0 }; + +static void addrconf_flush_host(struct nethost *host) +{ + struct inet6_ifaddr *ifa, **bifa; + int i; + if (!host) + return; + for (i = 0; i < IN6_ADDR_HSIZE; i++) { +restart: + bifa = &inet6_addr_lst[i]; + read_lock_bh(&addrconf_hash_lock); + while ((ifa = *bifa) != NULL) { + if (ifa->host == host) { + struct inet6_dev *idev; + idev = ifa->idev; + in6_ifa_hold(ifa); + read_unlock_bh(&addrconf_hash_lock); + + ipv6_del_addr(ifa); + + /* If the last address is deleted administratively, + disable IPv6 on this interface. + */ + if (idev->addr_list == NULL) { + addrconf_ifdown(idev->dev, 1); + } + goto restart; + } + bifa = &ifa->lst_next; + } + read_unlock_bh(&addrconf_hash_lock); + + } +} + static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 26b8726..3168359 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -293,7 +293,8 @@ int inet6_bind(struct socket *sock, stru */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { + if (!ipv6_chk_addr(sk->sk_host, &addr->sin6_addr, + dev, 0)) { if (dev) dev_put(dev); err = -EADDRNOTAVAIL; @@ -524,7 +525,7 @@ static struct inet_protosw rawv6_protosw .ops = &inet6_sockraw_ops, .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_REUSE, + .flags = INET_PROTOSW_REUSE | INET_PROTOSW_MULTIHOST, }; void diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 6b72940..baf401d 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -89,7 +89,7 @@ int ipv6_sock_ac_join(struct sock *sk, i return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(addr, NULL, 0)) + if (ipv6_chk_addr(NULL, addr, NULL, 0)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -314,7 +314,7 @@ int ipv6_dev_ac_inc(struct net_device *d goto out; } - rt = addrconf_dst_alloc(idev, addr, 1); + rt = addrconf_dst_alloc(idev, NULL, addr); if (IS_ERR(rt)) { kfree(aca); err = PTR_ERR(rt); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc51840..a42d5a4 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -148,6 +148,7 @@ ipv4_connected: fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; @@ -480,7 +481,7 @@ int datagram_recv_ctl(struct sock *sk, s return 0; } -int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, +int datagram_send_ctl(struct msghdr *msg, struct nethost *host, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass) { @@ -533,7 +534,7 @@ int datagram_send_ctl(struct msghdr *msg return -ENODEV; } } - if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) { + if (!ipv6_chk_addr(host, &src_info->ipi6_addr, dev, 0)) { if (dev) dev_put(dev); err = -EINVAL; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b7185fb..ae3f412 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -301,7 +301,7 @@ void icmpv6_send(struct sk_buff *skb, in */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(NULL, &hdr->daddr, skb->dev, 0)) saddr = &hdr->daddr; /* @@ -349,6 +349,7 @@ void icmpv6_send(struct sk_buff *skb, in ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); + fl.iif = skb->dst->dev->ifindex; fl.oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; @@ -453,6 +454,7 @@ static void icmpv6_echo_reply(struct sk_ ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); + fl.iif = skb->dst->dev->ifindex; fl.oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; @@ -556,8 +558,9 @@ static void icmpv6_notify(struct sk_buff read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { + struct nethost *host = ipv6_host(skb); while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, - IP6CB(skb)->iif))) { + IP6CB(skb)->iif, host))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4c1d26a..b8581b3 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,7 +23,8 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, + struct nethost *host) { struct sock *sk; const struct hlist_node *node; @@ -46,10 +47,8 @@ struct sock *inet6_lookup_listener(struc continue; score++; } -#if 0 - if (!ipv6_host_match(sk, dif, daddr)) + if (host != sk->sk_host) continue; -#endif if (score == 3) { result = sk; break; @@ -71,12 +70,12 @@ EXPORT_SYMBOL_GPL(inet6_lookup_listener) struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const u16 sport, const struct in6_addr *daddr, const u16 dport, - const int dif) + const int dif, struct nethost *host) { struct sock *sk; local_bh_disable(); - sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, host); local_bh_enable(); return sk; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 6376153..6cc78e8 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -280,7 +280,8 @@ static int fl6_renew(struct ip6_flowlabe } static struct ip6_flowlabel * -fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p) +fl_create(struct in6_flowlabel_req *freq, struct nethost *host, + char __user *optval, int optlen, int *err_p) { struct ip6_flowlabel *fl; int olen; @@ -314,7 +315,7 @@ fl_create(struct in6_flowlabel_req *freq msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(&msg, host, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; @@ -466,7 +467,7 @@ int ipv6_flowlabel_opt(struct sock *sk, if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; - fl = fl_create(&freq, optval, optlen, &err); + fl = fl_create(&freq, sk->sk_host, optval, optlen, &err); if (fl == NULL) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 563b442..f2b840e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -711,6 +711,14 @@ int ip6_dst_lookup(struct sock *sk, stru { int err = 0; + /* If the destinaition is the loopback address, use iif to select my + * current loopback interface. The routing code isn't strict about + * matching the interface but matches it if it can, so there is + * no need to consider a requested output interface here. + */ + if (ipv6_addr_type(&fl->fl6_dst) & IPV6_ADDR_LOOPBACK) + fl->oif = fl->iif; + *dst = NULL; if (sk) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -755,7 +763,8 @@ int ip6_dst_lookup(struct sock *sk, stru goto out_err_release; if (ipv6_addr_any(&fl->fl6_src)) { - err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); + struct nethost *host = ifindex_host(fl->iif); + err = ipv6_get_saddr(host, *dst, &fl->fl6_dst, &fl->fl6_src); if (err) goto out_err_release; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cf94372..e511209 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -798,10 +798,10 @@ static void ip6_tnl_set_cap(struct ip6_t if (p->link) ldev = dev_get_by_index(p->link); - if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0)) + if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(NULL, laddr, ldev, 0)) l_ok = 0; - if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0)) + if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(NULL, raddr, NULL, 0)) r_ok = 0; if (l_ok && r_ok) { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8567873..f3af4c2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -376,7 +376,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(&msg, sk->sk_host, &fl, opt, &junk, &junk); if (retv) goto done; update: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 305d9ee..9ef06c9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -435,7 +435,7 @@ static void ndisc_send_na(struct net_dev src_addr = solicited_addr; in6_ifa_put(ifp); } else { - if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr)) + if (ipv6_dev_get_saddr(NULL, dev, daddr, &tmpaddr)) return; src_addr = &tmpaddr; } @@ -685,7 +685,7 @@ static void ndisc_solicit(struct neighbo struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1)) + if (skb && ipv6_chk_addr(NULL, &skb->nh.ipv6h->saddr, dev, 1)) saddr = &skb->nh.ipv6h->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c9fb268..cbcb51b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -83,7 +83,7 @@ static void raw_v6_unhash(struct sock *s /* Grumble... icmp and ip_input want to get at this... */ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr, - int dif) + int dif, struct nethost *host) { struct hlist_node *node; int is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -107,7 +107,7 @@ struct sock *__raw_v6_lookup(struct sock goto found; continue; } - if (!ipv6_host_match(sk, dif, loc_addr)) + if (host && host != sk->sk_host) continue; goto found; } @@ -149,6 +149,7 @@ int ipv6_raw_deliver(struct sk_buff *skb struct in6_addr *saddr; struct in6_addr *daddr; struct sock *sk; + struct nethost *host; int delivered = 0; __u8 hash; @@ -168,7 +169,8 @@ int ipv6_raw_deliver(struct sk_buff *skb if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); + host = ipv6_host(skb); + sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif, host); while (sk) { delivered = 1; @@ -180,7 +182,7 @@ int ipv6_raw_deliver(struct sk_buff *skb rawv6_rcv(sk, clone); } sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, - IP6CB(skb)->iif); + IP6CB(skb)->iif, host); } out: read_unlock(&raw_v6_lock); @@ -241,7 +243,7 @@ static int rawv6_bind(struct sock *sk, s v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { err = -EADDRNOTAVAIL; - if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { + if (!ipv6_chk_addr(sk->sk_host, &addr->sin6_addr, dev, 0)) { if (dev) dev_put(dev); goto out; @@ -675,6 +677,7 @@ static int rawv6_sendmsg(struct kiocb *i * Get and verify the address. */ memset(&fl, 0, sizeof(fl)); + fl.iif = sk->sk_host->loopback_dev.ifindex; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -743,7 +746,7 @@ static int rawv6_sendmsg(struct kiocb *i memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(msg, sk->sk_host, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1041,6 +1044,7 @@ struct proto rawv6_prot = { #ifdef CONFIG_PROC_FS struct raw6_iter_state { + struct nethost *host; int bucket; }; @@ -1054,7 +1058,8 @@ static struct sock *raw6_get_first(struc for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket) sk_for_each(sk, node, &raw_v6_htable[state->bucket]) - if (sk->sk_family == PF_INET6) + if ((sk->sk_family == PF_INET6) && + (sk->sk_host == state->host)) goto out; sk = NULL; out: @@ -1069,7 +1074,7 @@ static struct sock *raw6_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET6); + } while (sk && ((sk->sk_family != PF_INET6) || (sk->sk_host != state->host))); if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) { sk = sk_head(&raw_v6_htable[state->bucket]); @@ -1165,6 +1170,7 @@ static int raw6_seq_open(struct inode *i struct raw6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; + s->host = current->host; rc = seq_open(file, &raw6_seq_ops); if (rc) goto out_kfree; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 330b3e6..d41dac2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -865,20 +865,6 @@ int ip6_route_add(struct in6_rtmsg *rtms */ if ((rtmsg->rtmsg_flags&RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { - /* hold loopback dev/idev if we haven't done so. */ - if (dev != &init_host.loopback_dev) { - if (dev) { - dev_put(dev); - in6_dev_put(idev); - } - dev = &init_host.loopback_dev; - dev_hold(dev); - idev = in6_dev_get(dev); - if (!idev) { - err = -ENODEV; - goto out; - } - } rt->u.dst.output = ip6_pkt_discard_out; rt->u.dst.input = ip6_pkt_discard; rt->u.dst.error = -ENETUNREACH; @@ -1392,21 +1378,23 @@ static int ip6_pkt_discard_out(struct sk */ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, - int anycast) + struct nethost *host, + const struct in6_addr *addr) { struct rt6_info *rt = ip6_dst_alloc(); + struct net_device *loopback_dev; if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&init_host.loopback_dev); + loopback_dev = host ? &host->loopback_dev : &init_host.loopback_dev; + dev_hold(loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &init_host.loopback_dev; + rt->rt6i_dev = loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -1414,7 +1402,7 @@ struct rt6_info *addrconf_dst_alloc(stru rt->u.dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; - if (!anycast) + if (host) rt->rt6i_flags |= RTF_LOCAL; rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (rt->rt6i_nexthop == NULL) { @@ -1628,7 +1616,7 @@ static int rt6_fill_node(struct sk_buff RTA_PUT(skb, RTA_IIF, 4, &iif); else if (dst) { struct in6_addr saddr_buf; - if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) + if (ipv6_get_saddr(NULL, &rt->u.dst, dst, &saddr_buf) == 0) RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d693cb9..f8f413a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -85,6 +85,7 @@ static inline int tcp_v6_bind_conflict(c /* We must walk the whole port owner list in this case. -DaveM */ sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && + sk->sk_host == sk2->sk_host && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && @@ -367,7 +368,7 @@ static int __tcp_v6_check_established(st /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif, sk->sk_host)) goto not_unique; } @@ -617,6 +618,7 @@ static int tcp_v6_connect(struct sock *s ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, (saddr ? saddr : &np->saddr)); + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; @@ -696,7 +698,7 @@ static void tcp_v6_err(struct sk_buff *s __u32 seq; sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, - th->source, skb->dev->ifindex); + th->source, skb->dev->ifindex, ipv6_host(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -748,6 +750,7 @@ static void tcp_v6_err(struct sk_buff *s fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; @@ -842,6 +845,7 @@ static int tcp_v6_send_synack(struct soc ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.fl6_flowlabel = 0; + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; @@ -992,6 +996,7 @@ static void tcp_v6_send_reset(struct sk_ buff->csum); fl.proto = IPPROTO_TCP; + fl.iif = skb->dst->dev->ifindex; fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1059,6 +1064,7 @@ static void tcp_v6_send_ack(struct sk_bu buff->csum); fl.proto = IPPROTO_TCP; + fl.iif = skb->dst->dev->ifindex; fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1106,7 +1112,8 @@ static struct sock *tcp_v6_hnd_req(struc nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, &skb->nh.ipv6h->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), inet6_iif(skb), + sk->sk_host); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1303,6 +1310,7 @@ static struct sock * tcp_v6_syn_recv_soc final_p = &final; } ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; @@ -1596,7 +1604,7 @@ static int tcp_v6_rcv(struct sk_buff **p sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, &skb->nh.ipv6h->daddr, ntohs(th->dest), - inet6_iif(skb)); + inet6_iif(skb), ipv6_host(skb)); if (!sk) goto no_tcp_socket; @@ -1669,7 +1677,8 @@ do_time_wait: sk2 = inet6_lookup_listener(&tcp_hashinfo, &skb->nh.ipv6h->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), inet6_iif(skb), + sk->sk_host); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); @@ -1707,6 +1716,7 @@ static int tcp_v6_rebuild_header(struct ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; @@ -1754,6 +1764,7 @@ static int tcp_v6_xmit(struct sk_buff *s ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); + fl.iif = sk->sk_host->loopback_dev.ifindex; fl.oif = sk->sk_bound_dev_if; fl.fl_ip_sport = inet->sport; fl.fl_ip_dport = inet->dport; @@ -2120,7 +2131,7 @@ static struct inet_protosw tcpv6_protosw .ops = &inet6_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_MULTIHOST, }; void __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2053c1d..0033678 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -116,6 +116,7 @@ gotit: &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { if (inet_sk(sk2)->num == snum && sk2 != sk && + sk2->sk_host == sk->sk_host && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -154,7 +155,8 @@ static void udp_v6_unhash(struct sock *s } static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, int dif) + struct in6_addr *daddr, u16 dport, int dif, + struct nethost *host) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -188,7 +190,7 @@ static struct sock *udp_v6_lookup(struct continue; score++; } - if (!ipv6_host_match(sk, dif, daddr)) + if (host && host != sk->sk_host) continue; if(score == 4) { result = sk; @@ -336,7 +338,8 @@ static void udpv6_err(struct sk_buff *sk struct sock *sk; int err; - sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex); + sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex, + ipv6_host(skb)); if (sk == NULL) return; @@ -517,7 +520,8 @@ static int udpv6_rcv(struct sk_buff **ps * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex); + sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex, + ipv6_host(skb)); if (sk == NULL) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) @@ -716,6 +720,7 @@ do_udp_sendmsg: ulen += sizeof(struct udphdr); memset(fl, 0, sizeof(*fl)); + fl->iif = sk->sk_host->loopback_dev.ifindex; if (sin6) { if (sin6->sin6_port == 0) @@ -764,7 +769,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(msg, sk->sk_host, fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1081,7 +1086,7 @@ static struct inet_protosw udpv6_protosw .ops = &inet6_dgram_ops, .capability =-1, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_MULTIHOST, }; -- 1.0.GIT