From: Joel Schopp Optimize the fastpath for the new mutex subsystem on PowerPC. Tested on a 4 core (2 SMT threads/core) Power5 machine with gcc 3.3.2. Test results from synchro-test.ko: All tests run for default 5 seconds Threads semaphores mutexes mutexes+attached 1 63,465,364 58,404,630 62,109,571 4 58,424,282 35,541,297 37,820,794 8 40,731,668 35,541,297 40,281,768 16 38,372,769 37,256,298 41,751,764 32 38,406,895 36,933,675 38,731,571 64 37,232,017 36,222,480 40,766,379 Signed-off-by: Joel Schopp Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- include/asm-powerpc/mutex.h | 85 ++++++++++++++++++++++++++++++++-- 1 files changed, 80 insertions(+), 5 deletions(-) diff -puN include/asm-powerpc/mutex.h~powerpc-fastpaths-for-mutex-subsystem include/asm-powerpc/mutex.h --- devel/include/asm-powerpc/mutex.h~powerpc-fastpaths-for-mutex-subsystem 2006-01-07 14:38:41.000000000 -0800 +++ devel-akpm/include/asm-powerpc/mutex.h 2006-01-07 14:38:41.000000000 -0800 @@ -1,9 +1,84 @@ /* - * Pull in the generic implementation for the mutex fastpath. + * include/asm-powerpc/mutex.h * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) + * PowerPC optimized mutex locking primitives + * + * Please look into asm-generic/mutex-xchg.h for a formal definition. + * Copyright (C) 2006 Joel Schopp , IBM */ +#ifndef _ASM_MUTEX_H +#define _ASM_MUTEX_H +#define __mutex_fastpath_lock(count, fail_fn)\ +do{ \ + int tmp; \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%1\n" \ +" addic %0,%0,-1\n" \ +" stwcx. %0,0,%1\n" \ +" bne- 1b\n" \ + ISYNC_ON_SMP \ + : "=&r" (tmp) \ + : "r" (&(count)->counter) \ + : "cr0", "memory"); \ + if (unlikely(tmp < 0)) \ + fail_fn(count); \ +} while (0) + +#define __mutex_fastpath_unlock(count, fail_fn)\ +do{ \ + int tmp; \ + __asm__ __volatile__(SYNC_ON_SMP\ +"1: lwarx %0,0,%1\n" \ +" addic %0,%0,1\n" \ +" stwcx. %0,0,%1\n" \ +" bne- 1b\n" \ + : "=&r" (tmp) \ + : "r" (&(count)->counter) \ + : "cr0", "memory"); \ + if (unlikely(tmp <= 0)) \ + fail_fn(count); \ +} while (0) + + +static inline int +__mutex_fastpath_trylock(atomic_t* count, int (*fail_fn)(atomic_t*)) +{ + int tmp; + __asm__ __volatile__( +"1: lwarx %0,0,%1\n" +" cmpwi 0,%0,1\n" +" bne- 2f\n" +" addic %0,%0,-1\n" +" stwcx. %0,0,%1\n" +" bne- 1b\n" +" isync\n" +"2:" + : "=&r" (tmp) + : "r" (&(count)->counter) + : "cr0", "memory"); + + return (int)tmp; + +} + +#define __mutex_slowpath_needs_to_unlock() 1 -#include +static inline int +__mutex_fastpath_lock_retval(atomic_t* count, int (*fail_fn)(atomic_t *)) +{ + int tmp; + __asm__ __volatile__( +"1: lwarx %0,0,%1\n" +" addic %0,%0,-1\n" +" stwcx. %0,0,%1\n" +" bne- 1b\n" +" isync \n" + : "=&r" (tmp) + : "r" (&(count)->counter) + : "cr0", "memory"); + if (unlikely(tmp < 0)) + return fail_fn(count); + else + return 0; +} +#endif _