? mdadm/disk0 ? mdadm/disk1 ? mdadm/disk2 ? mdadm/disk3 ? mdadm/disk4 ? mdadm/disk5 ? mdadm/makeme.sh ? mdadm/md.man ? mdadm/mdadm ? mdadm/mdadm.conf.man ? mdadm/mdadm.man =================================================================== RCS file: /home/hpa/raid6/cvsroot/mdadm/Create.c,v retrieving revision 1.1.1.2 retrieving revision 1.5 diff -u -r1.1.1.2 -r1.5 --- mdadm/Create.c 21 Dec 2003 07:44:54 -0000 1.1.1.2 +++ mdadm/Create.c 21 Dec 2003 09:09:33 -0000 1.5 @@ -81,9 +81,19 @@ Name ": a number of --raid-devices must be given to create an array\n"); return 1; } + if (raiddisks < 4 && level == 6) { + fprintf(stderr, + Name ": at least 4 raid-devices needed for level 6\n"); + return 1; + } + if (raiddisks > 256 && level == 6) { + fprintf(stderr, + Name ": no more than 256 raid-devices supported for level 6\n"); + return 1; + } if (raiddisks < 2 && level >= 4) { fprintf(stderr, - Name ": atleast 2 raid-devices needed for level 4 or 5\n"); + Name ": at least 2 raid-devices needed for level 4 or 5\n"); return 1; } if (raiddisks+sparedisks > MD_SB_DISKS) { @@ -108,6 +118,7 @@ layout = 0; break; case 5: + case 6: layout = map_name(r5layout, "default"); if (verbose) fprintf(stderr, @@ -118,6 +129,7 @@ switch(level) { case 4: case 5: + case 6: case 0: case -1: /* linear */ if (chunk == 0) { @@ -229,12 +241,19 @@ /* If this is raid5, we want to configure the last active slot * as missing, so that a reconstruct happens (faster than re-parity) + * FIX: Can we do this for raid6 as well? */ - if (force == 0 && level == 5 && first_missing >= raiddisks) { - insert_point = raiddisks-1; - sparedisks++; - array.active_disks--; - missing_disks++; + if (force == 0 && first_missing >= raiddisks) { + switch ( level ) { + case 5: + insert_point = raiddisks-1; + sparedisks++; + array.active_disks--; + missing_disks++; + break; + default: + break; + } } /* Ok, lets try some ioctls */ @@ -249,8 +268,10 @@ if (fstat(mdfd, &stb)==0) array.md_minor = MINOR(stb.st_rdev); array.not_persistent = 0; - if (level == 5 && (insert_point < raiddisks || first_missing < raiddisks)) - array.state = 1; /* clean, but one drive will be missing */ + /*** FIX: Need to do something about RAID-6 here ***/ + if ( (level == 5 || level == 6) && + (insert_point < raiddisks || first_missing < raiddisks) ) + array.state = 1; /* clean, but one+ drive will be missing */ else array.state = 0; /* not clean, but no errors */ =================================================================== RCS file: /home/hpa/raid6/cvsroot/mdadm/ReadMe.c,v retrieving revision 1.1.1.2 retrieving revision 1.4 diff -u -r1.1.1.2 -r1.4 --- mdadm/ReadMe.c 21 Dec 2003 07:44:54 -0000 1.1.1.2 +++ mdadm/ReadMe.c 21 Dec 2003 07:50:52 -0000 1.4 @@ -112,7 +112,7 @@ /* For create or build: */ {"chunk", 1, 0, 'c'}, {"rounding", 1, 0, 'c'}, /* for linear, chunk is really a rounding number */ - {"level", 1, 0, 'l'}, /* 0,1,4,5,linear */ + {"level", 1, 0, 'l'}, /* 0,1,4,5,6,linear */ {"parity", 1, 0, 'p'}, /* {left,right}-{a,}symmetric */ {"layout", 1, 0, 'p'}, {"raid-disks",1, 0, 'n'}, @@ -205,12 +205,12 @@ " For create or build:\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" -" --level= -l : raid level: 0,1,4,5,linear,mp. 0 or linear for build\n" -" --parity= -p : raid5 parity algorithm: {left,right}-{,a}symmetric\n" +" --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n" +" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n" " --layout= : same as --parity\n" " --raid-devices= -n : number of active devices in array\n" " --spare-devices= -x: number of spares (eXtras) devices in initial array\n" -" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n" +" --size= -z : Size (in K) of each drive in RAID1/4/5/6 - optional\n" " --force -f : Honour devices as listed on command line. Don't\n" " : insert a missing drive for RAID5.\n" "\n" @@ -270,12 +270,12 @@ " Options that are valid with --create (-C) are:\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" -" --level= -l : raid level: 0,1,4,5,linear,multipath and synonyms\n" -" --parity= -p : raid5 parity algorithm: {left,right}-{,a}symmetric\n" +" --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n" +" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n" " --layout= : same as --parity\n" " --raid-devices= -n : number of active devices in array\n" " --spare-devices= -x: number of spares (eXtras) devices in initial array\n" -" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n" +" --size= -z : Size (in K) of each drive in RAID1/4/5/6 - optional\n" " --force -f : Honour devices as listed on command line. Don't\n" " : insert a missing drive for RAID5.\n" " --run -R : insist of running the array even if not all\n" @@ -480,6 +480,8 @@ { "5", 5}, { "multipath", -4}, { "mp", -4}, + { "raid6", 6}, + { "6", 6}, { NULL, 0} }; =================================================================== RCS file: /home/hpa/raid6/cvsroot/mdadm/md.4,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- mdadm/md.4 21 Dec 2003 07:44:55 -0000 1.1.1.2 +++ mdadm/md.4 21 Dec 2003 08:03:36 -0000 1.2 @@ -15,9 +15,12 @@ .PP .B md supports RAID levels 1 (mirroring) 4 (striped array with parity -device) and 5 (striped array with distributed parity information). -If a single underlying device fails while using one of these levels, -the array will continue to function. +device), 5 (striped array with distributed parity information) and 6 +(striped array with distributed dual redundancy information.) If a +some number of underlying devices fails while using one of these +levels, the array will continue to function; this number is one for +RAID levels 4 and 5, two for RAID level 6, and all but one (N-1) for +RAID level 1. .PP .B md also supports a number of pseudo RAID (non-redundant) configurations @@ -140,6 +143,16 @@ This also allows more parallelism when reading as read requests are distributed over all the devices in the array instead of all but one. +.SS RAID6 + +RAID6 is similar to RAID5, but can handle the loss of any \fItwo\fP +devices without data loss. Accordingly, it requires N+2 drives to +store N drives worth of data. + +The performance for RAID6 is slightly lower but comparable to RAID5 in +normal mode and single disk failure mode. It is very slow in dual +disk failure mode, however. + .SS MUTIPATH MULTIPATH is not really a RAID at all as there is only one real device @@ -156,7 +169,7 @@ .SS UNCLEAN SHUTDOWN -When changes are made to a RAID1, RAID4, or RAID5 array there is a +When changes are made to a RAID1, RAID4, RAID5 or RAID6 array there is a possibility of inconsistency for short periods of time as each update requires are least two block to be written to different devices, and these writes probably wont happen at exactly the same time. @@ -166,33 +179,32 @@ To handle this situation, the md driver marks an array as "dirty" before writing any data to it, and marks it as "clean" when the array -is being disabled, e.g. at shutdown. -If the md driver finds an array to be dirty at startup, it proceeds to -correct any possibly inconsistency. For RAID1, this involves copying -the contents of the first drive onto all other drives. -For RAID4 or RAID5 this involves recalculating the parity for each -stripe and making sure that the parity block has the correct data. -This process, known as "resynchronising" or "resync" is performed in -the background. The array can still be used, though possibly with -reduced performance. - -If a RAID4 or RAID5 array is degraded (missing one drive) when it is -restarted after an unclean shutdown, it cannot recalculate parity, and -so it is possible that data might be undetectably corrupted. -The 2.4 md driver +is being disabled, e.g. at shutdown. If the md driver finds an array +to be dirty at startup, it proceeds to correct any possibly +inconsistency. For RAID1, this involves copying the contents of the +first drive onto all other drives. For RAID4, RAID5 and RAID6 this +involves recalculating the parity for each stripe and making sure that +the parity block has the correct data. This process, known as +"resynchronising" or "resync" is performed in the background. The +array can still be used, though possibly with reduced performance. + +If a RAID4, RAID5 or RAID6 array is degraded (missing at least one +drive) when it is restarted after an unclean shutdown, it cannot +recalculate parity, and so it is possible that data might be +undetectably corrupted. The 2.4 md driver .B does not alert the operator to this condition. The 2.5 md driver will fail to start an array in this condition without manual intervention. .SS RECOVERY -If the md driver detects any error on a device in a RAID1, RAID4, or -RAID5 array, it immediately disables that device (marking it as faulty) -and continues operation on the remaining devices. If there is a spare -drive, the driver will start recreating on one of the spare drives the -data what was on that failed drive, either by copying a working drive -in a RAID1 configuration, or by doing calculations with the parity -block on RAID4 and RAID5. +If the md driver detects any error on a device in a RAID1, RAID4, +RAID5 or RAID6 array, it immediately disables that device (marking it +as faulty) and continues operation on the remaining devices. If there +is a spare drive, the driver will start recreating on one of the spare +drives the data what was on that failed drive, either by copying a +working drive in a RAID1 configuration, or by doing calculations with +the parity block on RAID4, RAID5 or RAID6. While this recovery process is happening, the md driver will monitor accesses to the array and will slow down the rate of recovery if other =================================================================== RCS file: /home/hpa/raid6/cvsroot/mdadm/mdadm.8,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- mdadm/mdadm.8 21 Dec 2003 07:44:55 -0000 1.1.1.2 +++ mdadm/mdadm.8 21 Dec 2003 08:03:36 -0000 1.2 @@ -29,6 +29,7 @@ (mirroring), .BR RAID4 , .BR RAID5 , +.BR RAID6 , and .BR MULTIPATH . @@ -109,9 +110,9 @@ .TP .B "Follow or Monitor" Monitor one or more md devices and act on any state changes. This is -only meaningful for raid1, raid5 or multipath arrays as only these have -interesting state. raid0 or linear never have missing, spare, or -failed drives, so there is nothing to monitor. +only meaningful for raid1, 4, 5, 6 or multipath arrays as +only these have interesting state. raid0 or linear never have +missing, spare, or failed drives, so there is nothing to monitor. .SH OPTIONS @@ -234,8 +235,8 @@ .BR -l ", " --level= Set raid level. When used with .IR --create , -options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid5, 4, -raid5, 5, multipath, mp. Obviously some of these are synonymous. +options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid4, 4, +raid5, 5, raid6, 6, multipath, mp. Obviously some of these are synonymous. When used with .IR --build , @@ -279,7 +280,7 @@ .TP .BR -z ", " --size= -Amount (in Kibibytes) of space to use from each drive in RAID1/4/5. +Amount (in Kibibytes) of space to use from each drive in RAID1/4/5/6. This must be a multiple of the chunk size, and must leave about 128Kb of space at the end of the drive for the RAID superblock. If this is not specified @@ -532,7 +533,7 @@ is not given and insufficient drives were listed to start a complete (non-degraded) array, then the array is not started (to guard against usage errors). To insist that the array be started in this case (as -may work for RAID1 or RAID5), give the +may work for RAID1, 4, 5 or 6), give the .B --run flag. @@ -590,7 +591,7 @@ .B mdadm to leave the corresponding slot in the array empty. For a RAID4 or RAID5 array at most one slot can be -"\fBmissing\fP". +"\fBmissing\fP"; for a RAID6 array at most two slots. For a RAID1 array, only one real device needs to be given. All of the others can be "\fBmissing\fP". =================================================================== RCS file: /home/hpa/raid6/cvsroot/mdadm/mdadm.c,v retrieving revision 1.1.1.2 retrieving revision 1.3 diff -u -r1.1.1.2 -r1.3 --- mdadm/mdadm.c 21 Dec 2003 07:44:55 -0000 1.1.1.2 +++ mdadm/mdadm.c 21 Dec 2003 07:46:54 -0000 1.3 @@ -299,6 +299,7 @@ exit(2); case 5: + case 6: layout = map_name(r5layout, optarg); if (layout==-10) { fprintf(stderr, Name ": layout %s not understood for raid5.\n", =================================================================== RCS file: /home/hpa/raid6/cvsroot/mdadm/util.c,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- mdadm/util.c 21 Dec 2003 07:44:55 -0000 1.1.1.2 +++ mdadm/util.c 30 Dec 2003 06:58:48 -0000 1.2 @@ -124,6 +124,8 @@ case 4: case 5: return avail_disks >= raid_disks-1; + case 6: + return avail_disks >= raid_disks-2; default: return 0; }