=================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6.h,v retrieving revision 1.2 diff -u -r1.2 raid6.h --- linux-2.5/drivers/md/raid6.h 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6.h 21 Jan 2004 16:29:56 -0000 @@ -63,6 +63,7 @@ #define __init #define __exit +#define __attribute_const__ __attribute__((const)) #define preempt_enable() #define preempt_disable() =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6algos.c,v retrieving revision 1.2 diff -u -r1.2 raid6algos.c --- linux-2.5/drivers/md/raid6algos.c 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6algos.c 21 Jan 2004 16:42:19 -0000 @@ -46,7 +46,7 @@ &raid6_intx16, &raid6_intx32, #endif -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) &raid6_mmxx1, &raid6_mmxx2, &raid6_sse1x1, @@ -55,6 +55,8 @@ &raid6_sse2x2, #endif #if defined(__x86_64__) + &raid6_sse2x1, + &raid6_sse2x2, &raid6_sse2x4, #endif NULL =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6int.uc,v retrieving revision 1.2 diff -u -r1.2 raid6int.uc --- linux-2.5/drivers/md/raid6int.uc 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6int.uc 21 Jan 2004 16:34:17 -0000 @@ -1,6 +1,6 @@ /* -*- linux-c -*- ------------------------------------------------------- * * - * Copyright 2002 H. Peter Anvin - All Rights Reserved + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,14 +18,46 @@ * This file is postprocessed using unroller.pl */ +#include #include "raid6.h" /* * IA-64 wants insane amounts of unrolling. On other architectures that * is just a waste of space. */ +#if ($# <= 8) || defined(__ia64__) -#if ($# <= 8) || defined(_ia64__) + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + unative_t vv; + + vv = (v << 1) & NBYTES(0xfe); + return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t vv; + + vv = v & NBYTES(0x80); + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ + return vv; +} + static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { @@ -44,9 +76,8 @@ for ( z = z0-1 ; z >= 0 ; z-- ) { wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; wp$$ ^= wd$$; - w2$$ = wq$$ & NBYTES(0x80); - w1$$ = (wq$$ << 1) & NBYTES(0xfe); - w2$$ = (w2$$ << 1) - (w2$$ >> 7); + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); w2$$ &= NBYTES(0x1d); w1$$ ^= w2$$; wq$$ = w1$$ ^ wd$$; =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6mmx.c,v retrieving revision 1.2 diff -u -r1.2 raid6mmx.c --- linux-2.5/drivers/md/raid6mmx.c 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6mmx.c 21 Jan 2004 16:41:40 -0000 @@ -16,7 +16,7 @@ * MMX implementation of RAID-6 syndrome functions */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) #include "raid6.h" #include "raid6x86.h" =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6recov.c,v retrieving revision 1.2 diff -u -r1.2 raid6recov.c --- linux-2.5/drivers/md/raid6recov.c 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6recov.c 21 Jan 2004 16:31:58 -0000 @@ -117,7 +117,7 @@ } else { /* data+Q failure. Reconstruct data from P, then rebuild syndrome. */ - /* FIX */ + /* NOT IMPLEMENTED - equivalent to RAID-5 */ } } else { if ( failb == disks-2 ) { =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6sse1.c,v retrieving revision 1.2 diff -u -r1.2 raid6sse1.c --- linux-2.5/drivers/md/raid6sse1.c 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6sse1.c 21 Jan 2004 16:41:48 -0000 @@ -21,7 +21,7 @@ * worthwhile as a separate implementation. */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) #include "raid6.h" #include "raid6x86.h" =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6x86.h,v retrieving revision 1.2 diff -u -r1.2 raid6x86.h --- linux-2.5/drivers/md/raid6x86.h 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6x86.h 21 Jan 2004 19:50:50 -0000 @@ -1,7 +1,7 @@ #ident "$Id: raid6x86.h,v 1.3 2002/12/12 22:41:27 hpa Exp $" /* ----------------------------------------------------------------------- * * - * Copyright 2002 H. Peter Anvin - All Rights Reserved + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,54 +22,75 @@ #if defined(__i386__) || defined(__x86_64__) +#ifdef __x86_64__ + typedef struct { unsigned int fsave[27]; - unsigned int cr0; -} raid6_mmx_save_t; + unsigned long cr0; +} raid6_mmx_save_t __attribute__((aligned(16))); /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since the code doesn't know about the additional x86-64 registers */ -/* The +3 is so we can make sure the area is aligned properly */ typedef struct { - unsigned int sarea[8*4+3]; + unsigned int sarea[8*4]; unsigned int cr0; } raid6_sse_save_t __attribute__((aligned(16))); -#ifdef __x86_64__ - /* This is for x86-64-specific code which uses all 16 XMM registers */ typedef struct { - unsigned int sarea[16*4+3]; - unsigned int cr0; + unsigned int sarea[16*4]; + unsigned long cr0; } raid6_sse16_save_t __attribute__((aligned(16))); +/* On x86-64 the stack is 16-byte aligned */ +#define SAREA(x) (x->sarea) + +#else /* __i386__ */ + +typedef struct { + unsigned int fsave[27]; + unsigned long cr0; +} raid6_mmx_save_t; + +/* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte + alignment. The +3 is so we have the slack space to manually align + a properly-sized area correctly. */ +typedef struct { + unsigned int sarea[8*4+3]; + unsigned long cr0; +} raid6_sse_save_t; + +#define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) + #endif #ifdef __KERNEL__ /* Real code */ - static inline u32 raid6_get_fpu(void) +/* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */ + +static inline unsigned long raid6_get_fpu(void) { - u32 cr0; + unsigned long cr0; preempt_disable(); - asm volatile("movl %%cr0,%0 ; clts" : "=r" (cr0)); + asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0)); return cr0; } -static inline void raid6_put_fpu(u32 cr0) +static inline void raid6_put_fpu(unsigned long cr0) { - asm volatile("movl %0,%%cr0" : : "r" (cr0)); + asm volatile("mov %0,%%cr0" : : "r" (cr0)); preempt_enable(); } #else /* Dummy code for user space testing */ -static inline u32 raid6_get_fpu(void) +static inline unsigned long raid6_get_fpu(void) { return 0xf00ba6; } -static inline void raid6_put_fpu(u32 cr0) +static inline void raid6_put_fpu(unsigned long cr0) { (void)cr0; } @@ -90,13 +111,8 @@ static inline void raid6_before_sse(raid6_sse_save_t *s) { -#ifdef __x86_64__ - unsigned int *rsa = s->sarea; -#else - /* On i386 the save area may not be aligned */ - unsigned int *rsa = - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); -#endif + unsigned int *rsa = SAREA(s); + s->cr0 = raid6_get_fpu(); asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0])); @@ -111,13 +127,8 @@ static inline void raid6_after_sse(raid6_sse_save_t *s) { -#ifdef __x86_64__ - unsigned int *rsa = s->sarea; -#else - /* On i386 the save area may not be aligned */ - unsigned int *rsa = - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); -#endif + unsigned int *rsa = SAREA(s); + asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0])); asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4])); asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8])); @@ -132,13 +143,8 @@ static inline void raid6_before_sse2(raid6_sse_save_t *s) { -#ifdef __x86_64__ - unsigned int *rsa = &s->sarea; -#else - /* On i386 the save area may not be aligned */ - unsigned int *rsa = - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); -#endif + unsigned int *rsa = SAREA(s); + s->cr0 = raid6_get_fpu(); asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); @@ -153,13 +159,8 @@ static inline void raid6_after_sse2(raid6_sse_save_t *s) { -#ifdef __x86_64__ - unsigned int *rsa = s->sarea; -#else - /* On i386 the save area may not be aligned */ - unsigned int *rsa = - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); -#endif + unsigned int *rsa = SAREA(s); + asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); @@ -174,9 +175,9 @@ #ifdef __x86_64__ -static inline raid6_before_sse16(raid6_sse16_save_t *s) +static inline void raid6_before_sse16(raid6_sse16_save_t *s) { - unsigned int *rsa = s->sarea; + unsigned int *rsa = SAREA(s); s->cr0 = raid6_get_fpu(); @@ -198,9 +199,9 @@ asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60])); } -static inline raid6_after_sse16(raid6_sse16_save_t *s) +static inline void raid6_after_sse16(raid6_sse16_save_t *s) { - unsigned int *rsa = s->sarea; + unsigned int *rsa = SAREA(s); asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6test/Makefile,v retrieving revision 1.2 diff -u -r1.2 Makefile --- linux-2.5/drivers/md/raid6test/Makefile 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6test/Makefile 21 Jan 2004 16:30:25 -0000 @@ -17,12 +17,10 @@ %.uc: ../%.uc cp -f $< $@ -%.pl: ../%.pl - cp -f $< $@ - all: raid6.o raid6test raid6.o: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ + raid6int32.o \ raid6mmx.o raid6sse1.o raid6sse2.o \ raid6recov.o raid6algos.o \ raid6tables.o @@ -31,26 +29,29 @@ raid6test: raid6.o test.c $(CC) $(CFLAGS) -o raid6test $^ -raid6int1.c: raid6int.uc unroller.pl - $(PERL) ./unroller.pl 1 < raid6int.uc > $@ +raid6int1.c: raid6int.uc ../unroll.pl + $(PERL) ../unroll.pl 1 < raid6int.uc > $@ + +raid6int2.c: raid6int.uc ../unroll.pl + $(PERL) ../unroll.pl 2 < raid6int.uc > $@ -raid6int2.c: raid6int.uc unroller.pl - $(PERL) ./unroller.pl 2 < raid6int.uc > $@ +raid6int4.c: raid6int.uc ../unroll.pl + $(PERL) ../unroll.pl 4 < raid6int.uc > $@ -raid6int4.c: raid6int.uc unroller.pl - $(PERL) ./unroller.pl 4 < raid6int.uc > $@ +raid6int8.c: raid6int.uc ../unroll.pl + $(PERL) ../unroll.pl 8 < raid6int.uc > $@ -raid6int8.c: raid6int.uc unroller.pl - $(PERL) ./unroller.pl 8 < raid6int.uc > $@ +raid6int16.c: raid6int.uc ../unroll.pl + $(PERL) ../unroll.pl 16 < raid6int.uc > $@ -raid6int16.c: raid6int.uc unroller.pl - $(PERL) ./unroller.pl 16 < raid6int.uc > $@ +raid6int32.c: raid6int.uc ../unroll.pl + $(PERL) ../unroll.pl 32 < raid6int.uc > $@ raid6tables.c: mktables ./mktables > raid6tables.c clean: - rm -f *.o mktables mktables.c raid6int.uc raid6*.c raid6test + rm -f *.o mktables mktables.c raid6int.uc raid6*.c raid6test unroll.pl spotless: clean rm -f *~ =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6test/test.c,v retrieving revision 1.2 diff -u -r1.2 test.c --- linux-2.5/drivers/md/raid6test/test.c 21 Jan 2004 03:11:33 -0000 1.2 +++ linux-2.5/drivers/md/raid6test/test.c 21 Jan 2004 16:30:42 -0000 @@ -73,14 +73,19 @@ erra = memcmp(data[i], recovi, PAGE_SIZE); errb = memcmp(data[j], recovj, PAGE_SIZE); - printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", - raid6_call.name, - i, (i==NDISKS-2)?'P':'D', - j, (j==NDISKS-1)?'Q':(j==NDISKS-2)?'P':'D', - (!erra && !errb) ? "OK" : - !erra ? "ERRB" : - !errb ? "ERRA" : - "ERRAB"); + if ( i < NDISKS-2 && j == NDISKS-1 ) { + /* We don't implement the DQ failure scenario, since it's + equivalent to a RAID-5 failure (XOR, then recompute Q) */ + } else { + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", + raid6_call.name, + i, (i==NDISKS-2)?'P':'D', + j, (j==NDISKS-1)?'Q':(j==NDISKS-2)?'P':'D', + (!erra && !errb) ? "OK" : + !erra ? "ERRB" : + !errb ? "ERRA" : + "ERRAB"); + } dataptrs[i] = data[i]; dataptrs[j] = data[j];