From 2474c63f04582b6f4080d986b1019c0fe3464337 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 21 May 2007 10:18:55 -0400 Subject: [PATCH 3/4] [SCTP] Introduce hashinfo and convert bind hash to use it. Use the inet port allocator with an sctp specific conflict function. Signed-off-by: Vlad Yasevich --- include/net/sctp/sctp.h | 1 + include/net/sctp/structs.h | 33 ++---- net/sctp/endpointola.c | 2 +- net/sctp/protocol.c | 22 ++-- net/sctp/socket.c | 264 ++++++++++--------------------------------- 5 files changed, 85 insertions(+), 237 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index dda72bf..af5e288 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -88,6 +88,7 @@ #include #include #include +#include #include #include diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3b7eddb..906e0a2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -94,20 +94,6 @@ struct crypto_hash; #include #include -/* Structures useful for managing bind/connect. */ - -struct sctp_bind_bucket { - unsigned short port; - unsigned short fastreuse; - struct hlist_node node; - struct hlist_head owner; -}; - -struct sctp_bind_hashbucket { - spinlock_t lock; - struct hlist_head chain; -}; - /* Used for hashing all associations. */ struct sctp_hashbucket { rwlock_t lock; @@ -193,12 +179,6 @@ extern struct sctp_globals { int assoc_hashsize; struct sctp_hashbucket *assoc_hashtable; - /* This is the sctp port control hash. */ - int port_hashsize; - int port_rover; - spinlock_t port_alloc_lock; /* Protects port_rover. */ - struct sctp_bind_hashbucket *port_hashtable; - /* This is the global local address list. * We actively maintain this complete list of addresses on * the system by catching address add/delete events. @@ -214,6 +194,8 @@ extern struct sctp_globals { int prsctp_enable; } sctp_globals; +extern struct inet_hashinfo sctp_hashinfo; + #define sctp_rto_initial (sctp_globals.rto_initial) #define sctp_rto_min (sctp_globals.rto_min) #define sctp_rto_max (sctp_globals.rto_max) @@ -236,14 +218,16 @@ extern struct sctp_globals { #define sctp_ep_hashtable (sctp_globals.ep_hashtable) #define sctp_assoc_hashsize (sctp_globals.assoc_hashsize) #define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) -#define sctp_port_hashsize (sctp_globals.port_hashsize) -#define sctp_port_rover (sctp_globals.port_rover) -#define sctp_port_alloc_lock (sctp_globals.port_alloc_lock) -#define sctp_port_hashtable (sctp_globals.port_hashtable) #define sctp_local_addr_list (sctp_globals.local_addr_list) #define sctp_addip_enable (sctp_globals.addip_enable) #define sctp_prsctp_enable (sctp_globals.prsctp_enable) +extern struct inet_hashinfo sctp_hashinfo; + +#define sctp_bucket_cachep (sctp_hashinfo.bind_bucket_cachep) +#define sctp_port_hashsize (sctp_hashinfo.bhash_size) +#define sctp_port_hashtable (sctp_hashinfo.bhash) + /* SCTP Socket type: UDP or TCP style. */ typedef enum { SCTP_SOCKET_UDP = 0, @@ -267,7 +251,6 @@ struct sctp_sock { /* What is our base endpointer? */ struct sctp_endpoint *ep; - struct sctp_bind_bucket *bind_hash; /* Various Socket Options. */ __u16 default_stream; __u32 default_ppid; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 1404a9e..cb932c2 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -192,7 +192,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_bind_addr_free(&ep->base.bind_addr); /* Remove and free the port */ - if (sctp_sk(ep->base.sk)->bind_hash) + if (inet_sk(ep->base.sk)->sk_bind_hash) sctp_put_port(ep->base.sk); /* Give up our hold on the sock. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 11e3676..84334ef 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -80,7 +80,14 @@ static struct sctp_af *sctp_af_v4_specific; static struct sctp_af *sctp_af_v6_specific; struct kmem_cache *sctp_chunk_cachep __read_mostly; -struct kmem_cache *sctp_bucket_cachep __read_mostly; + +struct inet_hashinfo sctp_hashinfo __cacheline_aligned = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(sctp_hashinfo.lhash_wait), +}; + +EXPORT_SYMBOL_GPL(sctp_hashinfo); /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) @@ -978,7 +985,7 @@ SCTP_STATIC __init int sctp_init(void) /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket", - sizeof(struct sctp_bind_bucket), + sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!sctp_bucket_cachep) @@ -1105,10 +1112,10 @@ SCTP_STATIC __init int sctp_init(void) /* Allocate and initialize the SCTP port hash table. */ do { sctp_port_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_bind_hashbucket); + sizeof(struct inet_bind_hashbucket); if ((sctp_port_hashsize > (64 * 1024)) && order > 0) continue; - sctp_port_hashtable = (struct sctp_bind_hashbucket *) + sctp_port_hashtable = (struct inet_bind_hashbucket *) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { @@ -1121,9 +1128,6 @@ SCTP_STATIC __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; - printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); @@ -1197,7 +1201,7 @@ err_proto_register: list_del(&sctp_ipv4_specific.list); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * - sizeof(struct sctp_bind_hashbucket))); + sizeof(struct inet_bind_hashbucket))); err_bhash_alloc: kfree(sctp_ep_hashtable); err_ehash_alloc: @@ -1252,7 +1256,7 @@ SCTP_STATIC __exit void sctp_exit(void) kfree(sctp_ep_hashtable); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * - sizeof(struct sctp_bind_hashbucket))); + sizeof(struct inet_bind_hashbucket))); sctp_dbg_objcnt_exit(); sctp_proc_exit(); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 742e43d..7aec624 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,8 +107,6 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; -extern struct kmem_cache *sctp_bucket_cachep; - /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { @@ -308,6 +306,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct sctp_bind_addr *bp = &ep->base.bind_addr; + union sctp_addr tmp_addr; struct sctp_af *af; unsigned short snum; int ret = 0; @@ -351,6 +350,12 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; + /* Temporarily put the 'addr' into the sk so we can + * do bind conflict checking on it. + */ + af->from_sk(&tmp_addr, sk); + af->to_sk_saddr(addr, sk); + /* Make sure we are allowed to bind here. * The function sctp_get_port_local() does duplicate address * detection. @@ -358,12 +363,17 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if ((ret = sctp_get_port_local(sk, addr))) { if (ret == (long) sk) { /* This endpoint has a conflicting address. */ - return -EINVAL; + ret = -EINVAL; + goto err; } else { - return -EADDRINUSE; + ret = -EADDRINUSE; + goto err; } } + /* Put the original address back */ + af->to_sk_saddr(&tmp_addr, sk); + /* Refresh ephemeral port. */ if (!bp->port) bp->port = inet_sk(sk)->num; @@ -384,6 +394,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) } return ret; + +err: + af->to_sk_saddr(&tmp_addr, sk); + return ret; } /* ADDIP Section 4.1.1 Congestion Control of ASCONF Chunks @@ -4946,161 +4960,51 @@ static void sctp_unhash(struct sock *sk) * link to the socket (struct sock) that uses it, the port number and * a fastreuse flag (FIXME: NPI ipg). */ -static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum); - -static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) +static int sctp_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *pp) { - struct sctp_bind_hashbucket *head; /* hash list */ - struct sctp_bind_bucket *pp; /* hash list port iterator */ + struct sock *sk2; + struct sctp_endpoint *ep2; struct hlist_node *node; - unsigned short snum; - int ret; - - snum = ntohs(addr->v4.sin_port); + int reuse = sk->sk_reuse; + union sctp_addr addr; + + /* Grab the address we are trying to bind from sk */ + sctp_sk(sk)->pf->af->from_sk(&addr, sk); + addr.v4.sin_port = htons(pp->port); + + /* Run through the list of sockets bound to the port + * (pp->port) [via the pointers bind_next and + * bind_pprev in the struct sock *sk2 (pp->sk)]. On each one, + * we get the endpoint they describe and run through + * the endpoint's list of IP (v4 or v6) addresses, + * comparing each of the addresses with the address of + * the socket sk. If we find a match, then that means + * that this port/socket (sk) combination are already + * in an endpoint. + */ + sk_for_each_bound(sk2, node, &pp->owners) { + ep2 = sctp_sk(sk2)->ep; - SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum); - sctp_local_bh_disable(); + if (reuse && sk2->sk_reuse && + sk2->sk_state != SCTP_SS_LISTENING) + continue; - if (snum == 0) { - /* Search for an available port. - * - * 'sctp_port_rover' was the last port assigned, so - * we start to search from 'sctp_port_rover + - * 1'. What we do is first check if port 'rover' is - * already in the hash table; if not, we use that; if - * it is, we try next. - */ - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - int index; - - sctp_spin_lock(&sctp_port_alloc_lock); - rover = sctp_port_rover; - do { - rover++; - if ((rover < low) || (rover > high)) - rover = low; - index = sctp_phashfn(rover); - head = &sctp_port_hashtable[index]; - sctp_spin_lock(&head->lock); - hlist_for_each_entry(pp, node, &head->chain, node) - if (pp->port == rover) - goto next; + if (sctp_bind_addr_match(&ep2->base.bind_addr, &addr, + sctp_sk(sk))) break; - next: - sctp_spin_unlock(&head->lock); - } while (--remaining > 0); - sctp_port_rover = rover; - sctp_spin_unlock(&sctp_port_alloc_lock); - - /* Exhausted local port range during search? */ - ret = 1; - if (remaining <= 0) - goto fail; - - /* OK, here is the one we will use. HEAD (the port - * hash table list entry) is non-NULL and we hold it's - * mutex. - */ - snum = rover; - } else { - /* We are given an specific port number; we verify - * that it is not being used. If it is used, we will - * exahust the search in the hash list corresponding - * to the port number (snum) - we detect that with the - * port iterator, pp being NULL. - */ - head = &sctp_port_hashtable[sctp_phashfn(snum)]; - sctp_spin_lock(&head->lock); - hlist_for_each_entry(pp, node, &head->chain, node) { - if (pp->port == snum) - goto pp_found; - } } - pp = NULL; - goto pp_not_found; -pp_found: - if (!hlist_empty(&pp->owner)) { - /* We had a port hash table hit - there is an - * available port (pp != NULL) and it is being - * used by other socket (pp->owner not empty); that other - * socket is going to be sk2. - */ - int reuse = sk->sk_reuse; - struct sock *sk2; - struct hlist_node *node; - - SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); - if (pp->fastreuse && sk->sk_reuse && - sk->sk_state != SCTP_SS_LISTENING) - goto success; - - /* Run through the list of sockets bound to the port - * (pp->port) [via the pointers bind_next and - * bind_pprev in the struct sock *sk2 (pp->sk)]. On each one, - * we get the endpoint they describe and run through - * the endpoint's list of IP (v4 or v6) addresses, - * comparing each of the addresses with the address of - * the socket sk. If we find a match, then that means - * that this port/socket (sk) combination are already - * in an endpoint. - */ - sk_for_each_bound(sk2, node, &pp->owner) { - struct sctp_endpoint *ep2; - ep2 = sctp_sk(sk2)->ep; - - if (reuse && sk2->sk_reuse && - sk2->sk_state != SCTP_SS_LISTENING) - continue; - - if (sctp_bind_addr_match(&ep2->base.bind_addr, addr, - sctp_sk(sk))) { - ret = (long)sk2; - goto fail_unlock; - } - } + if (node != NULL) { SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n"); - } -pp_not_found: - /* If there was a hash table miss, create a new port. */ - ret = 1; - if (!pp && !(pp = sctp_bucket_create(head, snum))) - goto fail_unlock; - - /* In either case (hit or miss), make sure fastreuse is 1 only - * if sk->sk_reuse is too (that is, if the caller requested - * SO_REUSEADDR on this socket -sk-). - */ - if (hlist_empty(&pp->owner)) { - if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING) - pp->fastreuse = 1; - else - pp->fastreuse = 0; - } else if (pp->fastreuse && - (!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING)) - pp->fastreuse = 0; - - /* We are set, so fill up all the data in the hash table - * entry, tie the socket list information with the rest of the - * sockets FIXME: Blurry, NPI (ipg). - */ -success: - if (!sctp_sk(sk)->bind_hash) { - inet_sk(sk)->num = snum; - sk_add_bind_node(sk, &pp->owner); - sctp_sk(sk)->bind_hash = pp; - } - ret = 0; - -fail_unlock: - sctp_spin_unlock(&head->lock); + return 1; + } else + return 0; +} -fail: - sctp_local_bh_enable(); - return ret; +static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) +{ + return inet_csk_get_port(&sctp_hashinfo, sk, ntohs(addr->v4.sin_port), + sctp_bind_conflict); } /* Assign a 'snum' port to the socket. If snum == 0, an ephemeral @@ -5170,7 +5074,7 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) if (sctp_autobind(sk)) return -EAGAIN; } else - sctp_sk(sk)->bind_hash->fastreuse = 0; + inet_sk(sk)->sk_bind_hash->fastreuse = 0; sctp_hash_endpoint(ep); return 0; @@ -5214,7 +5118,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) if (sctp_autobind(sk)) return -EAGAIN; } else - sctp_sk(sk)->bind_hash->fastreuse = 0; + inet_sk(sk)->sk_bind_hash->fastreuse = 0; sk->sk_max_ack_backlog = backlog; sctp_hash_endpoint(ep); @@ -5346,53 +5250,9 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) * 2nd Level Abstractions ********************************************************************/ -static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum) -{ - struct sctp_bind_bucket *pp; - - pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC); - SCTP_DBG_OBJCNT_INC(bind_bucket); - if (pp) { - pp->port = snum; - pp->fastreuse = 0; - INIT_HLIST_HEAD(&pp->owner); - hlist_add_head(&pp->node, &head->chain); - } - return pp; -} - -/* Caller must hold hashbucket lock for this tb with local BH disabled */ -static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) -{ - if (pp && hlist_empty(&pp->owner)) { - __hlist_del(&pp->node); - kmem_cache_free(sctp_bucket_cachep, pp); - SCTP_DBG_OBJCNT_DEC(bind_bucket); - } -} - -/* Release this socket's reference to a local port. */ -static inline void __sctp_put_port(struct sock *sk) -{ - struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)]; - struct sctp_bind_bucket *pp; - - sctp_spin_lock(&head->lock); - pp = sctp_sk(sk)->bind_hash; - __sk_del_bind_node(sk); - sctp_sk(sk)->bind_hash = NULL; - inet_sk(sk)->num = 0; - sctp_bucket_destroy(pp); - sctp_spin_unlock(&head->lock); -} - void sctp_put_port(struct sock *sk) { - sctp_local_bh_disable(); - __sctp_put_port(sk); - sctp_local_bh_enable(); + inet_put_port(&sctp_hashinfo, sk); } /* @@ -5964,7 +5824,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, { struct sctp_sock *oldsp = sctp_sk(oldsk); struct sctp_sock *newsp = sctp_sk(newsk); - struct sctp_bind_bucket *pp; /* hash list port iterator */ + struct inet_bind_bucket *pp; /* hash list port iterator */ struct sctp_endpoint *newep = newsp->ep; struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; @@ -5985,9 +5845,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - pp = sctp_sk(oldsk)->bind_hash; - sk_add_bind_node(newsk, &pp->owner); - sctp_sk(newsk)->bind_hash = pp; + pp = inet_sk(oldsk)->sk_bind_hash; + sk_add_bind_node(newsk, &pp->owners); + inet_sk(newsk)->sk_bind_hash = pp; inet_sk(newsk)->num = inet_sk(oldsk)->num; /* Copy the bind_addr list from the original endpoint to the new -- 1.5.2.4