Without branch hints, the very unlikely chance of the loop repeating due to cmpxchg failure is unrolled with gcc-4 that I have tested. Improve this for architectures with a native cas/cmpxchg. llsc archs should try to implement this natively. Signed-off-by: Nick Piggin Index: linux-2.6/include/asm-i386/atomic.h =================================================================== --- linux-2.6.orig/include/asm-i386/atomic.h +++ linux-2.6/include/asm-i386/atomic.h @@ -231,8 +231,14 @@ static __inline__ int atomic_sub_return( ({ \ int c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ + } \ c != (u); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) Index: linux-2.6/include/asm-ia64/atomic.h =================================================================== --- linux-2.6.orig/include/asm-ia64/atomic.h +++ linux-2.6/include/asm-ia64/atomic.h @@ -95,8 +95,14 @@ ia64_atomic64_sub (__s64 i, atomic64_t * ({ \ int c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ + } \ c != (u); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) Index: linux-2.6/include/asm-m68k/atomic.h =================================================================== --- linux-2.6.orig/include/asm-m68k/atomic.h +++ linux-2.6/include/asm-m68k/atomic.h @@ -175,8 +175,14 @@ static inline void atomic_set_mask(unsig ({ \ int c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ + } \ c != (u); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) Index: linux-2.6/include/asm-s390/atomic.h =================================================================== --- linux-2.6.orig/include/asm-s390/atomic.h +++ linux-2.6/include/asm-s390/atomic.h @@ -89,10 +89,15 @@ static __inline__ int atomic_cmpxchg(ato static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) { int c, old; - c = atomic_read(v); - while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c) + for (;;) { + if (unlikely(c == u)) + break; + old = atomic_cmpxchg(v, c, c + a); + if (likely(old == c)) + break; c = old; + } return c != u; } @@ -167,10 +172,15 @@ static __inline__ int atomic64_add_unles long long a, long long u) { long long c, old; - c = atomic64_read(v); - while (c != u && (old = atomic64_cmpxchg(v, c, c + a)) != c) + for (;;) { + if (unlikely(c == u)) + break; + old = atomic64_cmpxchg(v, c, c + a); + if (likely(old == c)) + break; c = old; + } return c != u; } Index: linux-2.6/include/asm-sparc64/atomic.h =================================================================== --- linux-2.6.orig/include/asm-sparc64/atomic.h +++ linux-2.6/include/asm-sparc64/atomic.h @@ -78,9 +78,15 @@ extern int atomic64_sub_ret(int, atomic6 ({ \ int c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ - c != (u); \ + } \ + likely(c != (u)); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) Index: linux-2.6/include/asm-x86_64/atomic.h =================================================================== --- linux-2.6.orig/include/asm-x86_64/atomic.h +++ linux-2.6/include/asm-x86_64/atomic.h @@ -405,8 +405,14 @@ static __inline__ long atomic64_sub_retu ({ \ int c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ + } \ c != (u); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)