#! /usr/bin/perl use strict; use Pod::Usage; use Getopt::Long; use File::Find; # define the line width for the output my $linesize = 40; # define a megabyte: my $Mb = 1024*1024; # some predifined sizes the user can use to set the size to fill my %predef_sizes = ( kb => 1024, mb => $Mb, gb => 1024*$Mb, cd => 700*$Mb, dvd => 4481*$Mb, dvdram => 4579655680, floppy => 1.44*$Mb, ); my @files = (); # the files to pack my $medium_size = $predef_sizes{dvd}; # default medium is a dvd my $tolerance = -1; # initialize at -1 to check if has been defined by user #process options from the command line my ($help,$verbose,$quiet,$units); GetOptions ( "medium-size=s" => \$medium_size, # string "help" => \$help, # switch "verbose" => \$verbose, # switch "quiet" => \$quiet, # switch "tolerance=s" => \$tolerance, # string "units" => \$units #switch ) or pod2usage({-exitval => 1, -verbose => 1}); pod2usage({-exitval => 2, -verbose => 2}) if($help); list_units() if($units); my $max = parse_size($medium_size); # $max will be the target size # $tol will be tolerance on the result: the program will stop as soon as # a size between $max-$tol and $max is reached. By default, the tolerance is # set to 0.01% of the target size $tolerance = 0.0001*$max unless($tolerance>=0); my $tol = parse_size($tolerance); # after the options are stripped from the command line, what is left # tells us what to pack if($#ARGV<0) # no argument => all the files in the current directory { @files = glob("*"); } elsif($#ARGV==0) # 1 argument: the directory in which the files to pack are { $ARGV[0] =~ s![/\\]$!!; # strip the trailing '/' or '\' if(! -d $ARGV[0]) { print $ARGV[0]." is not a directory\n"; pod2usage({-exitval => 4, -verbose => 2}) } @files = glob($ARGV[0]."/*"); } else # more than 1 argument => those are the files to pack { @files = @ARGV; } # initializations: my $totalsize = 0; # the total size of the selected files in the current iteration my $minrest = $max; my @sizes; # the size of each file my @selected; # the status (selected or not) of each file in the current iteration my $i; # loop thru all the files, print if the user asked for verbosity, # and calculates the size of each one print "All files:\n" if($verbose); for($i=0; $i<=$#files; ++$i) { if(-d $files[$i]) #it's a directory => calculate its size by recurrence { $sizes[$i] = 0; find sub{$sizes[$i] += -s $_; }, $files[$i] ; $files[$i] .= " (DIR)"; } else { $sizes[$i] = -s $files[$i]; } $selected[$i] = 0; #initialize the selection array to 0 $totalsize += $sizes[$i]; if($verbose) { my $out = " $files[$i]"; print $out; print " " x ($linesize-length($out)); printf( ": %5d Mb\n",int($sizes[$i]/$Mb+0.5)); } } print "-" x ($linesize+11)."\n" if($verbose); #print some info unless the user asked for quiet if(!$quiet) { my $out = " TOTAL (".($#files+1)." files)" ; print $out. " " x ($linesize-length($out)); printf(": %5d Mb\n",int($totalsize/$Mb+0.5)); $out = " MAXIMUM SIZE TO FILL" ; print $out. " " x ($linesize-length($out)); printf(": %5d Mb\n",int($max/$Mb+0.5)); print "\n Nb combinaisons: ".2**($#files+1)."\n\n"; } my @best = @selected; # the best selection so far if($totalsize<$max) #all the files don't fill the medium => nothing to do { print "All the selected files amount to less than the medium size.\n"; print "You can add all of them to the medium without getting over\n"; print "the size limit. No optimization to be done.\n"; #select all the files @best = map { 1 } @selected; #display result on screen and exit finish(); } # catch Ctrl-C to finish the search and display the best so far $SIG{INT} = \&finish; #start the first iteration: print "Press Ctrl-C when the solution is satisfactory\n" unless $quiet; $|=1; # we want the real-time results displayed on the screen iter(0,$max); finish(); #should never reach this point, finish calls the exit function ################################################################################ # ################################################################################ sub showbest { return if($quiet); print "\b"x50; # backspace characters to go back to the beginning of the line if($minrest>$Mb) { printf("Best solution so far:%8.3f Mb ",$minrest/$Mb); } elsif($minrest>1024) { printf("Best solution so far:%8.3f Kb ",$minrest/1024); } else { printf("Best solution so far:%4d bytes ",$minrest); } } sub finish { # called when we found the best solution my $totalsize = 0; my $nbfiles = 0; my $out; $| = 0; # we don't need the autoflush anymore print "\b"x55 . "RESULTS:". " "x43 ."\n"; for(my $i=0; $i<=$#files; ++$i) { if($best[$i]) { $out = " $files[$i]"; print $out . " " x ($linesize-length($out)); printf( ": %5d Mb\n",int($sizes[$i]/$Mb+0.5)); $totalsize += $sizes[$i]; ++$nbfiles; } } print "-" x ($linesize+11)."\n"; my $out = " TOTAL ($nbfiles files)" ; print $out. " " x ($linesize-length($out)); printf(": %5d Mb\n",int($totalsize/$Mb+0.5)); $out = " MAXIMUM SIZE TO FILL" ; print $out. " " x ($linesize-length($out)); printf(": %5d Mb\n",int($max/$Mb+0.5)); $out = " UNUSED " ; print $out. " " x ($linesize-length($out)); printf(": %5d Mb\n",int($max/$Mb+0.5)-int($totalsize/$Mb+0.5)); exit; } sub iter { # The search is done by iteration on the depth. We have to explore # a tree, each file has 2 possible status: packed or not packed. # So, at each level (file) each tree branch is divided in 2. # Each node on the tree corresponds to a certain number of # files being packed and hence to a certain size. # Obviously, we stop exploring branches where the size is already # larger that the maximum allowed my $i = shift; # current file number or depth my $maxsize = shift; # the size of the medium left to fill if($i>$#files) # no more files to add { # since there are no more files, maxsize is here also the # size of the space left on the medium in this node if($maxsize < $minrest && $maxsize > 0) { # we have a new best solution! $minrest = $maxsize; @best = @selected; showbest(); finish() if($minrest<=$tol); } return 1; } # we now have to test 2 options: pack this file or not: # 1. let's try taking the file i into the package if($sizes[$i]< $maxsize) { $selected[$i] = 1; # we continue to explore the tree of solution with the next file, # but this time the size available is decreased since we just included # a file in the package iter($i+1, $maxsize-$sizes[$i]); } else { # we have reached the maximum, adding the file would make # the package too big. Test if this solution is better than # what we had this far: if($maxsize < $minrest && $maxsize > 0) { # we have a new best solution $minrest = $maxsize; @best = @selected; showbest(); finish() if($minrest<=$tol); } } # 2. let's try the other alternative: we don't take the file i # in the package $selected[$i] = 0; # we continue to explore the tree of solution with the next file iter($i+1, $maxsize); } ################################################################################ # ################################################################################ sub parse_size { # allows the use of units pre defined in %predef_sizes # ex: 2.5Gb, dvd, cd, 35Mb,... my $size = shift; # string to decode # we are going to decompose the string into a numerical factor (=1 if absent) # and a unit (=1 byte if absent). my ($factor,$unit); if($size =~ m/^(\d+\.\d*|\.?\d+)(.*)/) # if $max starts with a number { $factor = $1; $unit = $2; } else # just the unit without multiplying factor, ex: 'cd', 'dvd' { $factor = 1; $unit = $size; } return $factor if($unit eq ""); # no unit, just a number! return $factor * $predef_sizes{lc($unit)} if(exists($predef_sizes{lc($unit)})); # error: should never reach this point return -1; } sub list_units { my ($unit,$value); print "The use of units is NOT case-sensitive.\n"; print "The pre-defined units are:\n"; while(my($unit, $value) = each(%predef_sizes)) { printf("%10s: %10.0f (%7.2f Mb)\n",$unit,$value,$value/$Mb); #print $unit . " "x(10-length($unit)) .": $value } exit; } __END__ =pod =head1 NAME packit.pl - chooses files to pack to avoid wasting space on cd,dvd,... =head1 SYNOPSIS packit.pl [options] [directory] packit.pl [options] file/directory file/directory ... =head1 ARGUMENTS =over 2 =item B<[directory]> The directory in which the files to pack are. If omitted, the current directory is used. The subdirectories are considered as un breakable blocks. To make sure a group of files will be on the same medium, place them in a subdirectory (ex: MyBook-Tome1.pdf, MyBook-Tome2.pdf,...) =item B If more than 1 argument is passed, each one is assumed to be a file or a directory (considered 1 piece) to pack. =back =head1 OPTIONS =over 2 =item B<-h> or B<--help> Produces full documentation. =item B<-m size> or B<--medium-size size> Sets the size of the medium to fill. packit will try to reach this target size without overshooting. The default is the size of a dvd. The format supports the usage of units (run B to see a list of supported units). If no unit is specified, bytes is assumed. ex: 700Mb, dvd, floppy, 3Gb, 12345 =item B<-q> or B<--quiet> Reduces the programs output. =item B<-t size> or B<--tolerance size> The tolerance on the result. For the possible formats of size, see B<--medium-size size>. The tolerance is set by default to 0.01% of the medium-size. For example, if you have a lot of files, the program can take a very long time to test all the possible solutions. When the solution is close enough, it's is not worth waiting a long time just to improve by a few bytes. So you can set the tolerance to e.g. 10Kb, if you think that a solution within 10Kb is good enough. =item B<-u> or B<--units> Display the list of supported units and exit. =item B<-v> or B<--verbose> Prints more information about the set of files we start with. =back =head1 EXAMPLES =over 2 =item packit -t 25kb -m dvd Will try to group some files or directories from the current directory so as to fill a dvd with as little as possible space left on the dvd. The program will stop and display the solution as soon as it finds one leaving less than 25 kilobytes free on the dvd. =back =head1 BUGS Could in principle get into trouble if the number of files is very large since the program uses recurrence. I never had that happening in practice yet. Try increasing the tolerance if it does. =head1 COPYRIGHT Copyright Thomas Materna (http://www.tmaterna.com) Use and redistribution is allowed provided that this copyright and disclaimer is reproduced. This code is provided ``as is'', without B express or implied warranties. In no event should the author be liable for any consequence of any type of use of this software. =head1 AUTHORS Thomas Materna (L) =head1 README B helps saving space when burning CDs, DVDs,... It computes which files/directories to burn on a medium in order to make the unused space on the medium as little as possible. The current best solution is displayed in real time (unless B<--quiet> is used), the user can stop the program at any time when satisfied by hitting Ctrl-C. The program will then display the detail of the best solution and exit. =head1 PREREQUISITES C, C for the documentation, C for the comman line parsing, C to compute a directory's size =head1 OSNAMES any =head1 SCRIPT CATEGORIES CPAN Win32/Utilities =cut