From: Greg Banks Make the nfsd read-ahead params cache more SMP-friendly by changing the single global list and lock into a fixed 16-bucket hashtable with per-bucket locks. This reduces spinlock contention in nfsd_read() on read-heavy workloads on multiprocessor servers. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K streaming reads at full line rate. The server had 128 nfsd threads, which sizes the RA cache at 256 entries, of which only a handful were used. Flat profiling shows nfsd_read(), including the inlined nfsd_get_raparms(), taking 10.4% of each CPU. This patch drops the contribution from nfsd() to 1.71% for each CPU. Signed-off-by: Greg Banks Signed-off-by: Neil Brown Signed-off-by: Andrew Morton --- fs/nfsd/vfs.c | 60 +++++++++++++++++++++++++++++++++++------------- 1 files changed, 44 insertions(+), 16 deletions(-) diff -puN fs/nfsd/vfs.c~knfsd-make-nfsd-readahead-params-cache-smp-friendly fs/nfsd/vfs.c --- a/fs/nfsd/vfs.c~knfsd-make-nfsd-readahead-params-cache-smp-friendly +++ a/fs/nfsd/vfs.c @@ -54,6 +54,7 @@ #include #include #endif /* CONFIG_NFSD_V4 */ +#include #include @@ -81,10 +82,19 @@ struct raparms { dev_t p_dev; int p_set; struct file_ra_state p_ra; + unsigned int p_hindex; }; +struct raparm_hbucket { + struct raparms *pb_head; + spinlock_t pb_lock; +} ____cacheline_aligned_in_smp; + static struct raparms * raparml; -static struct raparms * raparm_cache; +#define RAPARM_HASH_BITS 4 +#define RAPARM_HASH_SIZE (1<p_next) { + spin_lock(&rab->pb_lock); + for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { if (ra->p_ino == ino && ra->p_dev == dev) goto found; depth++; @@ -761,7 +775,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) } depth = nfsdstats.ra_size*11/10; if (!frap) { - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); return NULL; } rap = frap; @@ -769,15 +783,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino) ra->p_dev = dev; ra->p_ino = ino; ra->p_set = 0; + ra->p_hindex = hash; found: - if (rap != &raparm_cache) { + if (rap != &rab->pb_head) { *rap = ra->p_next; - ra->p_next = raparm_cache; - raparm_cache = ra; + ra->p_next = rab->pb_head; + rab->pb_head = ra; } ra->p_count++; nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); return ra; } @@ -853,11 +868,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st /* Write back readahead params */ if (ra) { - spin_lock(&ra_lock); + struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; + spin_lock(&rab->pb_lock); ra->p_ra = file->f_ra; ra->p_set = 1; ra->p_count--; - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); } if (err >= 0) { @@ -1833,11 +1849,11 @@ nfsd_permission(struct svc_export *exp, void nfsd_racache_shutdown(void) { - if (!raparm_cache) + if (!raparml) return; dprintk("nfsd: freeing readahead buffers.\n"); kfree(raparml); - raparm_cache = raparml = NULL; + raparml = NULL; } /* * Initialize readahead param cache @@ -1846,19 +1862,31 @@ int nfsd_racache_init(int cache_size) { int i; + int j = 0; + int nperbucket; - if (raparm_cache) + + if (raparml) return 0; + if (cache_size < 2*RAPARM_HASH_SIZE) + cache_size = 2*RAPARM_HASH_SIZE; raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); if (raparml != NULL) { dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); + for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { + raparm_hash[i].pb_head = NULL; + spin_lock_init(&raparm_hash[i].pb_lock); + } + nperbucket = cache_size >> RAPARM_HASH_BITS; memset(raparml, 0, sizeof(struct raparms) * cache_size); for (i = 0; i < cache_size - 1; i++) { - raparml[i].p_next = raparml + i + 1; + if (i % nperbucket == 0) + raparm_hash[j++].pb_head = raparml + i; + if (i % nperbucket < nperbucket-1) + raparml[i].p_next = raparml + i + 1; } - raparm_cache = raparml; } else { printk(KERN_WARNING "nfsd: Could not allocate memory read-ahead cache.\n"); _