From: Nick Piggin Use atomic_inc_not_zero for rcu files instead of special case rcuref. Signed-off-by: Nick Piggin Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton --- dev/null | 220 ------------------------------------------- Documentation/RCU/rcuref.txt | 87 +++++++---------- fs/aio.c | 3 fs/file_table.c | 8 - include/linux/fs.h | 3 kernel/rcupdate.c | 14 -- kernel/rcutorture.c | 1 security/selinux/hooks.c | 2 8 files changed, 47 insertions(+), 291 deletions(-) diff -puN Documentation/RCU/rcuref.txt~rcu-file-use-atomic-primitives Documentation/RCU/rcuref.txt --- devel/Documentation/RCU/rcuref.txt~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/Documentation/RCU/rcuref.txt 2005-12-10 21:50:18.000000000 -0800 @@ -1,74 +1,67 @@ -Refcounter framework for elements of lists/arrays protected by -RCU. +Refcounter design for elements of lists/arrays protected by RCU. Refcounting on elements of lists which are protected by traditional reader/writer spinlocks or semaphores are straight forward as in: -1. 2. -add() search_and_reference() -{ { - alloc_object read_lock(&list_lock); - ... search_for_element - atomic_set(&el->rc, 1); atomic_inc(&el->rc); - write_lock(&list_lock); ... - add_element read_unlock(&list_lock); - ... ... - write_unlock(&list_lock); } +1. 2. +add() search_and_reference() +{ { + alloc_object read_lock(&list_lock); + ... search_for_element + atomic_set(&el->rc, 1); atomic_inc(&el->rc); + write_lock(&list_lock); ... + add_element read_unlock(&list_lock); + ... ... + write_unlock(&list_lock); } } 3. 4. release_referenced() delete() { { - ... write_lock(&list_lock); - atomic_dec(&el->rc, relfunc) ... - ... delete_element -} write_unlock(&list_lock); - ... - if (atomic_dec_and_test(&el->rc)) - kfree(el); - ... + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + kfree(el); + ... } If this list/array is made lock free using rcu as in changing the write_lock in add() and delete() to spin_lock and changing read_lock -in search_and_reference to rcu_read_lock(), the rcuref_get in +in search_and_reference to rcu_read_lock(), the atomic_get in search_and_reference could potentially hold reference to an element which -has already been deleted from the list/array. rcuref_lf_get_rcu takes +has already been deleted from the list/array. atomic_inc_not_zero takes care of this scenario. search_and_reference should look as; 1. 2. add() search_and_reference() { { - alloc_object rcu_read_lock(); - ... search_for_element - atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { - write_lock(&list_lock); rcu_read_unlock(); - return FAIL; - add_element } - ... ... - write_unlock(&list_lock); rcu_read_unlock(); + alloc_object rcu_read_lock(); + ... search_for_element + atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) { + write_lock(&list_lock); rcu_read_unlock(); + return FAIL; + add_element } + ... ... + write_unlock(&list_lock); rcu_read_unlock(); } } 3. 4. release_referenced() delete() { { - ... write_lock(&list_lock); - rcuref_dec(&el->rc, relfunc) ... - ... delete_element -} write_unlock(&list_lock); - ... - if (rcuref_dec_and_test(&el->rc)) - call_rcu(&el->head, el_free); - ... + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + call_rcu(&el->head, el_free); + ... } Sometimes, reference to the element need to be obtained in the -update (write) stream. In such cases, rcuref_inc_lf might be an overkill -since the spinlock serialising list updates are held. rcuref_inc +update (write) stream. In such cases, atomic_inc_not_zero might be an +overkill since the spinlock serialising list updates are held. atomic_inc is to be used in such cases. -For arches which do not have cmpxchg rcuref_inc_lf -api uses a hashed spinlock implementation and the same hashed spinlock -is acquired in all rcuref_xxx primitives to preserve atomicity. -Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the -refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api -might lead to races. rcuref_inc_lf() must be used in lockfree -RCU critical sections only. + diff -puN fs/aio.c~rcu-file-use-atomic-primitives fs/aio.c --- devel/fs/aio.c~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/fs/aio.c 2005-12-10 21:50:18.000000000 -0800 @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx * /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff -puN fs/file_table.c~rcu-file-use-atomic-primitives fs/file_table.c --- devel/fs/file_table.c~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/fs/file_table.c 2005-12-10 21:50:18.000000000 -0800 @@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) + if (atomic_dec_and_test(&file->f_count)) __fput(file); } @@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!rcuref_inc_lf(&file->f_count)) { + if (!atomic_inc_not_zero(&file->f_count)) { /* File object ref couldn't be taken */ rcu_read_unlock(); return NULL; @@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigne rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (rcuref_inc_lf(&file->f_count)) + if (atomic_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigne void put_filp(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) { + if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); diff -puN include/linux/fs.h~rcu-file-use-atomic-primitives include/linux/fs.h --- devel/include/linux/fs.h~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/include/linux/fs.h 2005-12-10 21:50:18.000000000 -0800 @@ -9,7 +9,6 @@ #include #include #include -#include /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -655,7 +654,7 @@ extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); #define file_list_unlock() spin_unlock(&files_lock); -#define get_file(x) rcuref_inc(&(x)->f_count) +#define get_file(x) atomic_inc(&(x)->f_count) #define file_count(x) atomic_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) diff -L include/linux/rcuref.h -puN include/linux/rcuref.h~rcu-file-use-atomic-primitives /dev/null --- devel/include/linux/rcuref.h +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,220 +0,0 @@ -/* - * rcuref.h - * - * Reference counting for elements of lists/arrays protected by - * RCU. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005 - * - * Author: Dipankar Sarma - * Ravikiran Thirumalai - * - * See Documentation/RCU/rcuref.txt for detailed user guide. - * - */ - -#ifndef _RCUREF_H_ -#define _RCUREF_H_ - -#ifdef __KERNEL__ - -#include -#include -#include -#include - -/* - * These APIs work on traditional atomic_t counters used in the - * kernel for reference counting. Under special circumstances - * where a lock-free get() operation races with a put() operation - * these APIs can be used. See Documentation/RCU/rcuref.txt. - */ - -#ifdef __HAVE_ARCH_CMPXCHG - -/** - * rcuref_inc - increment refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline void rcuref_inc(atomic_t *rcuref) -{ - atomic_inc(rcuref); -} - -/** - * rcuref_dec - decrement refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline void rcuref_dec(atomic_t *rcuref) -{ - atomic_dec(rcuref); -} - -/** - * rcuref_dec_and_test - decrement refcount for object and test - * @rcuref: reference counter in the object. - * @release: pointer to the function that will clean up the object - * when the last reference to the object is released. - * This pointer is required. - * - * Decrement the refcount, and if 0, return 1. Else return 0. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline int rcuref_dec_and_test(atomic_t *rcuref) -{ - return atomic_dec_and_test(rcuref); -} - -/* - * cmpxchg is needed on UP too, if deletions to the list/array can happen - * in interrupt context. - */ - -/** - * rcuref_inc_lf - Take reference to an object in a read-side - * critical section protected by RCU. - * @rcuref: reference counter in the object in question. - * - * Try and increment the refcount by 1. The increment might fail if - * the reference counter has been through a 1 to 0 transition and - * is no longer part of the lock-free list. - * Returns non-zero on successful increment and zero otherwise. - */ -static inline int rcuref_inc_lf(atomic_t *rcuref) -{ - int c, old; - c = atomic_read(rcuref); - while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c) - c = old; - return c; -} - -#else /* !__HAVE_ARCH_CMPXCHG */ - -extern spinlock_t __rcuref_hash[]; - -/* - * Use a hash table of locks to protect the reference count - * since cmpxchg is not available in this arch. - */ -#ifdef CONFIG_SMP -#define RCUREF_HASH_SIZE 4 -#define RCUREF_HASH(k) \ - (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)]) -#else -#define RCUREF_HASH_SIZE 1 -#define RCUREF_HASH(k) &__rcuref_hash[0] -#endif /* CONFIG_SMP */ - -/** - * rcuref_inc - increment refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline void rcuref_inc(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter += 1; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); -} - -/** - * rcuref_dec - decrement refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline void rcuref_dec(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter -= 1; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); -} - -/** - * rcuref_dec_and_test - decrement refcount for object and test - * @rcuref: reference counter in the object. - * @release: pointer to the function that will clean up the object - * when the last reference to the object is released. - * This pointer is required. - * - * Decrement the refcount, and if 0, return 1. Else return 0. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline int rcuref_dec_and_test(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter--; - if (!rcuref->counter) { - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return 1; - } else { - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return 0; - } -} - -/** - * rcuref_inc_lf - Take reference to an object of a lock-free collection - * by traversing a lock-free list/array. - * @rcuref: reference counter in the object in question. - * - * Try and increment the refcount by 1. The increment might fail if - * the reference counter has been through a 1 to 0 transition and - * object is no longer part of the lock-free list. - * Returns non-zero on successful increment and zero otherwise. - */ -static inline int rcuref_inc_lf(atomic_t *rcuref) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - if (rcuref->counter) - ret = rcuref->counter++; - else - ret = 0; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return ret; -} - - -#endif /* !__HAVE_ARCH_CMPXCHG */ - -#endif /* __KERNEL__ */ -#endif /* _RCUREF_H_ */ diff -puN kernel/rcupdate.c~rcu-file-use-atomic-primitives kernel/rcupdate.c --- devel/kernel/rcupdate.c~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/kernel/rcupdate.c 2005-12-10 21:50:18.000000000 -0800 @@ -46,7 +46,6 @@ #include #include #include -#include #include /* Definition for rcupdate control block. */ @@ -74,19 +73,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_d static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; static int maxbatch = 10000; -#ifndef __HAVE_ARCH_CMPXCHG -/* - * We use an array of spinlocks for the rcurefs -- similar to ones in sparc - * 32 bit atomic_t implementations, and a hash function similar to that - * for our refcounting needs. - * Can't help multiprocessors which donot have cmpxchg :( - */ - -spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { - [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED -}; -#endif - /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. diff -puN kernel/rcutorture.c~rcu-file-use-atomic-primitives kernel/rcutorture.c --- devel/kernel/rcutorture.c~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/kernel/rcutorture.c 2005-12-10 21:50:18.000000000 -0800 @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff -puN security/selinux/hooks.c~rcu-file-use-atomic-primitives security/selinux/hooks.c --- devel/security/selinux/hooks.c~rcu-file-use-atomic-primitives 2005-12-10 21:50:18.000000000 -0800 +++ devel-akpm/security/selinux/hooks.c 2005-12-10 21:50:18.000000000 -0800 @@ -1689,7 +1689,7 @@ static inline void flush_unauthorized_fi continue; } if (devnull) { - rcuref_inc(&devnull->f_count); + atomic_inc(&devnull->f_count); } else { devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); if (!devnull) { _