From: Dave Jones In cases where we detect a single bit has been flipped, we spew the usual slab corruption message, which users instantly think is a kernel bug. In a lot of cases, single bit errors are down to bad memory, or other hardware failure. This patch adds an extra line to the slab debug messages in those cases, in the hope that users will try memtest before they report a bug. 000: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b Single bit error detected. Possibly bad RAM. Run memtest86. Signed-off-by: Dave Jones Signed-off-by: Andrew Morton --- mm/slab.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff -puN mm/slab.c~single-bit-flip-detector mm/slab.c --- a/mm/slab.c~single-bit-flip-detector +++ a/mm/slab.c @@ -1683,10 +1683,28 @@ static void poison_obj(struct kmem_cache static void dump_line(char *data, int offset, int limit) { int i; + unsigned char total = 0, bad_count = 0, errors; printk(KERN_ERR "%03x:", offset); - for (i = 0; i < limit; i++) + for (i = 0; i < limit; i++) { + if (data[offset + i] != POISON_FREE) { + total += data[offset + i]; + bad_count++; + } printk(" %02x", (unsigned char)data[offset + i]); + } printk("\n"); + + if (bad_count == 1) { + errors = total ^ POISON_FREE; + if (errors && !(errors & (errors-1))) { + printk(KERN_ERR "Single bit error detected. Probably bad RAM.\n"); +#ifdef CONFIG_X86 + printk(KERN_ERR "Run memtest86+ or a similar memory test tool.\n"); +#else + printk(KERN_ERR "Run a memory test tool.\n"); +#endif + } + } } #endif _