From: Evgeniy Polyakov Socket notifications. This patch includes socket send/recv/accept notifications. Using trivial web server based on kevent and this features instead of epoll it's performance increased more than noticebly. More details about various benchmarks and server itself (evserver_kevent.c) can be found on project's homepage. Signed-off-by: Evgeniy Polyakov index ada7643..ff1b129 100644 Signed-off-by: Andrew Morton --- fs/inode.c | 7 + include/net/sock.h | 32 ++++--- include/net/tcp.h | 1 kernel/kevent/kevent_socket.c | 135 ++++++++++++++++++++++++++++++++ net/core/sock.c | 10 ++ net/core/stream.c | 1 net/ipv4/tcp_input.c | 1 net/ipv4/tcp_ipv4.c | 2 net/socket.c | 3 9 files changed, 176 insertions(+), 16 deletions(-) diff -puN fs/inode.c~kevent-v23-socket-notifications fs/inode.c --- a/fs/inode.c~kevent-v23-socket-notifications +++ a/fs/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -164,12 +165,18 @@ static struct inode *alloc_inode(struct } inode->i_private = NULL; inode->i_mapping = mapping; +#if defined CONFIG_KEVENT_SOCKET + kevent_storage_init(inode, &inode->st); +#endif } return inode; } void destroy_inode(struct inode *inode) { +#if defined CONFIG_KEVENT_SOCKET + kevent_storage_fini(&inode->st); +#endif BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) diff -puN include/net/sock.h~kevent-v23-socket-notifications include/net/sock.h --- a/include/net/sock.h~kevent-v23-socket-notifications +++ a/include/net/sock.h @@ -48,6 +48,7 @@ #include #include /* struct sk_buff */ #include +#include #include @@ -450,6 +451,21 @@ static inline int sk_stream_memory_free( extern void sk_stream_rfree(struct sk_buff *skb); +struct socket_alloc { + struct socket socket; + struct inode vfs_inode; +}; + +static inline struct socket *SOCKET_I(struct inode *inode) +{ + return &container_of(inode, struct socket_alloc, vfs_inode)->socket; +} + +static inline struct inode *SOCK_INODE(struct socket *socket) +{ + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; +} + static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk) { skb->sk = sk; @@ -477,6 +493,7 @@ static inline void sk_add_backlog(struct sk->sk_backlog.tail = skb; } skb->next = NULL; + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); } #define sk_wait_event(__sk, __timeo, __condition) \ @@ -678,21 +695,6 @@ static inline struct kiocb *siocb_to_kio return si->kiocb; } -struct socket_alloc { - struct socket socket; - struct inode vfs_inode; -}; - -static inline struct socket *SOCKET_I(struct inode *inode) -{ - return &container_of(inode, struct socket_alloc, vfs_inode)->socket; -} - -static inline struct inode *SOCK_INODE(struct socket *socket) -{ - return &container_of(socket, struct socket_alloc, socket)->vfs_inode; -} - extern void __sk_stream_mem_reclaim(struct sock *sk); extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); diff -puN include/net/tcp.h~kevent-v23-socket-notifications include/net/tcp.h --- a/include/net/tcp.h~kevent-v23-socket-notifications +++ a/include/net/tcp.h @@ -857,6 +857,7 @@ static inline int tcp_prequeue(struct so tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, (3 * TCP_RTO_MIN) / 4, diff -puN /dev/null kernel/kevent/kevent_socket.c --- /dev/null +++ a/kernel/kevent/kevent_socket.c @@ -0,0 +1,135 @@ +/* + * kevent_socket.c + * + * 2006 Copyright (c) Evgeniy Polyakov + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int kevent_socket_callback(struct kevent *k) +{ + struct inode *inode = k->st->origin; + unsigned int events = SOCKET_I(inode)->ops->poll(SOCKET_I(inode)->file, SOCKET_I(inode), NULL); + + if ((events & (POLLIN | POLLRDNORM)) && (k->event.event & (KEVENT_SOCKET_RECV | KEVENT_SOCKET_ACCEPT))) + return 1; + if ((events & (POLLOUT | POLLWRNORM)) && (k->event.event & KEVENT_SOCKET_SEND)) + return 1; + return 0; +} + +int kevent_socket_enqueue(struct kevent *k) +{ + struct inode *inode; + struct socket *sock; + int err = -EBADF; + + sock = sockfd_lookup(k->event.id.raw[0], &err); + if (!sock) + goto err_out_exit; + + inode = igrab(SOCK_INODE(sock)); + if (!inode) + goto err_out_fput; + + err = kevent_storage_enqueue(&inode->st, k); + if (err) + goto err_out_iput; + + err = k->callbacks.callback(k); + if (err) + goto err_out_dequeue; + + return err; + +err_out_dequeue: + kevent_storage_dequeue(k->st, k); +err_out_iput: + iput(inode); +err_out_fput: + sockfd_put(sock); +err_out_exit: + return err; +} + +int kevent_socket_dequeue(struct kevent *k) +{ + struct inode *inode = k->st->origin; + struct socket *sock; + + kevent_storage_dequeue(k->st, k); + + sock = SOCKET_I(inode); + iput(inode); + sockfd_put(sock); + + return 0; +} + +void kevent_socket_notify(struct sock *sk, u32 event) +{ + if (sk->sk_socket) + kevent_storage_ready(&SOCK_INODE(sk->sk_socket)->st, NULL, event); +} + +/* + * It is required for network protocols compiled as modules, like IPv6. + */ +EXPORT_SYMBOL_GPL(kevent_socket_notify); + +#ifdef CONFIG_LOCKDEP +static struct lock_class_key kevent_sock_key; + +void kevent_socket_reinit(struct socket *sock) +{ + struct inode *inode = SOCK_INODE(sock); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); +} + +void kevent_sk_reinit(struct sock *sk) +{ + if (sk->sk_socket) { + struct inode *inode = SOCK_INODE(sk->sk_socket); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); + } +} +#endif +static int __init kevent_init_socket(void) +{ + struct kevent_callbacks sc = { + .callback = &kevent_socket_callback, + .enqueue = &kevent_socket_enqueue, + .dequeue = &kevent_socket_dequeue}; + + return kevent_add_callbacks(&sc, KEVENT_SOCKET); +} +module_init(kevent_init_socket); diff -puN net/core/sock.c~kevent-v23-socket-notifications net/core/sock.c --- a/net/core/sock.c~kevent-v23-socket-notifications +++ a/net/core/sock.c @@ -1389,6 +1389,7 @@ static void sock_def_wakeup(struct sock if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_error_report(struct sock *sk) @@ -1398,6 +1399,7 @@ static void sock_def_error_report(struct wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,0,POLL_ERR); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_readable(struct sock *sk, int len) @@ -1407,6 +1409,7 @@ static void sock_def_readable(struct soc wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,1,POLL_IN); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_write_space(struct sock *sk) @@ -1426,6 +1429,7 @@ static void sock_def_write_space(struct } read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } static void sock_def_destruct(struct sock *sk) @@ -1476,6 +1480,8 @@ void sock_init_data(struct socket *sock, sk->sk_state = TCP_CLOSE; sk->sk_socket = sock; + kevent_sk_reinit(sk); + sock_set_flag(sk, SOCK_ZAPPED); if(sock) @@ -1542,8 +1548,10 @@ void fastcall release_sock(struct sock * if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&sk->sk_lock.wq)) + if (waitqueue_active(&sk->sk_lock.wq)) { wake_up(&sk->sk_lock.wq); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); + } spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff -puN net/core/stream.c~kevent-v23-socket-notifications net/core/stream.c --- a/net/core/stream.c~kevent-v23-socket-notifications +++ a/net/core/stream.c @@ -36,6 +36,7 @@ void sk_stream_write_space(struct sock * wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, 2, POLL_OUT); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } } diff -puN net/ipv4/tcp_input.c~kevent-v23-socket-notifications net/ipv4/tcp_input.c --- a/net/ipv4/tcp_input.c~kevent-v23-socket-notifications +++ a/net/ipv4/tcp_input.c @@ -3119,6 +3119,7 @@ static void tcp_ofo_queue(struct sock *s __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) tcp_fin(skb, sk, skb->h.th); diff -puN net/ipv4/tcp_ipv4.c~kevent-v23-socket-notifications net/ipv4/tcp_ipv4.c --- a/net/ipv4/tcp_ipv4.c~kevent-v23-socket-notifications +++ a/net/ipv4/tcp_ipv4.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -870,6 +871,7 @@ int tcp_v4_conn_request(struct sock *sk, reqsk_free(req); } else { inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + kevent_socket_notify(sk, KEVENT_SOCKET_ACCEPT); } return 0; diff -puN net/socket.c~kevent-v23-socket-notifications net/socket.c --- a/net/socket.c~kevent-v23-socket-notifications +++ a/net/socket.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include @@ -484,6 +485,8 @@ static struct socket *sock_alloc(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; + kevent_socket_reinit(sock); + get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); return sock; _