4 # Author : Eduard Bloch ( blade@debian.org )
5 # Last Modified On : Sun, 06 Feb 2005 14:59:51 +0100
6 # Status : Working, but use with caution!
13 use List::Util 'shuffle';
14 use Getopt::Long qw(:config no_ignore_case bundling);
43 "h|help" => \$opt_help,
44 "d|dirhier" => \$opt_dir,
46 "f|filter=s" => \$opt_filter,
47 "F|follow" => \$opt_follow,
48 "e|expmode=i" => \$emode,
49 "o|overhead=i" => \$ofac,
50 "b|blksize=i" => \$bsize,
51 "n|no-act" => \$opt_sim,
52 "m|move" => \$opt_move,
53 "l|symlink" => \$opt_sln,
54 "L|hardlink" => \$opt_ln,
55 "v|verbose" => \$opt_ver,
57 "S|simple" => \$opt_simple,
58 "T|input=s" => \$opt_listfile,
59 "p|prefix=s" => \$prefix,
60 "a|accuracy=i" => \$acc,
61 "H|longhelp" => \$opt_longhelp,
62 "version" => \$get_ver
65 &show_help(1) unless ( GetOptions(%options));
66 &show_help(1) if $opt_help;
67 &show_longhelp if $opt_longhelp;
73 # ignore the old dirhier setting since it is default now and disable the flag when opt_flat is specified
74 $opt_dir = !$opt_flat;
76 $opt_ver = 1 if $opt_sim;
77 $opt_move=1 if ($opt_sln || $opt_ln);
79 # big list @sizes containing the "items" (object sizes)
80 # %names hash mapping "items" (size as key) to arrays with filenames/subarrays for coalesced files
84 # result containts the calculated output. In simple mode, an
85 # array (bins) of atoms (files or filelists). Otherwise, sizes
86 # instead of atoms, to be resolved with %names.
92 # about 400kB for iso headers
99 if(-d $ARGV[0] || (-d readlink($ARGV[0]))) {
100 syswrite(STDOUT,"Building file list, please wait...\n");
101 # save the absolut path before doing anyhting
102 $inputdir=Cwd::abs_path($ARGV[0]);
105 elsif($opt_listfile) {
106 if($opt_listfile eq "-") {
107 &parseListe(\*STDIN);
110 open(my $in, "<", $opt_listfile) || die "Cannot open list file $opt_listfile\n";
115 die "Error: please specify a directory\n";
118 # check for pointless requests
121 die "Too large object(s) ($_) for the given max size: @{$names{$_}} (maybe coalesced in arrays, check manually)\n" if($_>$max);
126 $acc=1 if ($testsize <= $max); # just generate a list, more trials are pointless
127 print "\nSumm: $testsize\n" if($opt_ver);
128 die "Nothing to do!\n" if($testsize<4096); # looks like just an empty dir
131 syswrite(STDOUT, "Calculating, please wait...\n");
133 $globwaste=$max*@sizes;
135 syswrite(STDOUT,".");
137 #my $waste = bp_bestfit($max, \@in, \@tmp);
138 my $waste = bp_firstfit($max, \@sizes, \@tmp);
139 #print "D: waste - $waste\n";
140 if($waste < $globwaste) {
144 if($starttime && time > $starttime+10) {
145 syswrite(STDOUT,"\nSpent already over 10s (for $_ iterations)\nHint: reduce accuracy to make it faster!\n");
148 @sizes=shuffle(@sizes);
153 print "\nCalculated, using ".(scalar @result)." volumes.\n";
154 print "Wasted: $globwaste Byte (estimated, check mkisofs -print-size ...)\n";
158 my $inDirLen=length($inputdir);
162 open($o, ">$prefix$i.list") if(! ($opt_move || $opt_sim));
163 my $dirPrefix=dirname($prefix);
164 my $prefixBase=basename($prefix);
165 my $dirPrefixAbs=Cwd::abs_path($dirPrefix);
170 # For simple mode, the files/atoms are already resolved, otherwise take
171 # the next with appropriate size.
172 my $item= $opt_simple ? $_ : shift(@{$names{$_}});
174 # make reference point to an array with our files, create a list if needed
175 if(ref($item) eq "ARRAY") {
182 for my $file (@$stuffRef) {
183 my $relFile=substr($file,$inDirLen+1);
184 my $base=basename($relFile);
186 my $targetsubdir = $dirPrefixAbs."/$prefixBase$i";
187 $targetsubdir .= "/".dirname($relFile) if($opt_dir);
188 print "$file -> $targetsubdir/$base\n" if($opt_ver);
190 mkpath $targetsubdir || die "Problems creating $targetsubdir\n";
192 die "Could not create $targetsubdir?\n" if(!(-d $targetsubdir && -w $targetsubdir));
194 symlink($file, "$targetsubdir/$base");
197 if(-d $file && !-l $file) {
198 mkdir "$targetsubdir/$base";
201 link($file, "$targetsubdir/$base");
205 rename($file, "$targetsubdir/$base");
210 # escape = in mkisofs catalogs, they are used as separator
211 my $isoname = ($opt_dir?$relFile : $base);
213 my $sourcefile=$file;
214 $sourcefile=~s/=/\\=/g;
215 print "$i: /$isoname=$sourcefile\n" if $opt_ver;
216 print $o "/$isoname=$sourcefile\n" if(!$opt_sim);
264 # parameter: directory
265 # mode 1: descend as far as possible and index all non-directories
267 # put all files of a dir into coaleseced-object, then descend into each dir
274 opendir(DIR, $dir) || die "Could not open $dir\n";
278 @stuff=sort { lc($a) cmp lc($b) } @stuff;
281 foreach my $f (@stuff) {
282 next if ($f eq "." || $f eq "..");
283 #print "\$f=$opt_filter;\n";
285 $f="$dir/$f" if($dir ne ".");
288 next unless (eval("\$f=~$opt_filter;"));
291 if(-l $f && ! $opt_follow) {
303 if( (@dirs + @files) == 0 ) {
304 # this one is empty, register for cosmetics reason
305 &insitem(getsize($dir), $dir);
309 # recurse on directories
310 &explore($_) for(@dirs);
312 # and now process files
314 &insitem(getsize($_), $_) for(@files);
317 # handle coalesced objects - first some sanity checks and splitting if
324 # already too large, stop right here
325 die "Too large file ($_) for the given max size $max, aborting...\n";
330 # handle coal. objects becoming too large
332 # too large coal. object...
334 # don't coalesc in this mode, do like mode 1 above, leave them alone
335 &insitem(getsize($_), $_) for(@files);
338 # a bit complicated, split file set while creating coal.objects
341 my @sorted=sort(@files);
343 for(my $i=0;$i<=$#sorted;$i++) {
344 # print "D: i: $i, partsum: $partsum, file: $sorted[$i]\n";
345 my $tmp=getsize($sorted[$i]);
348 # undo the last step then build the coal.object
352 &insitem($partsum, \@tmpvol);
358 push(@tmpvol, $sorted[$i]);
365 # ok, building a coalesced object for simple cases
367 &insitem($filesum, \@files);
375 # args: size, object (filename or list reference)
377 my ($size, $object) = @_;
378 # normaly, put the items into the pool for calculation. In simple mode, calculate here
381 push(@{$names{$size}},$object);
384 # now the simplest method to fill the bins, just take a new one when the
385 # object-to-be-added no longer fits
386 if($simpleBinSizes[$simplePos]+$size > $max) {
387 $globwaste += ( $max-$simpleBinSizes[$simplePos] );
390 $simpleBinSizes[$simplePos]+=$size;
391 push( @{$result[$simplePos]}, $object);
398 my $size = ((stat($file))[7]);
399 my $rest = ($size % $bsize);
400 $size = ($size + $bsize - $rest) if ($rest);
401 return 1+int(200 + $ofac*length(basename($file)) + $size);
407 if(/^(\w+)\s+(.+)/) {
408 &insitem(fixnr($1), $2);
416 # optional: default multiplier
419 if($_[0]=~/(\d+)(\D)/) {
423 elsif(defined($_[1])) {
430 return $nr*1000000000 if($fac eq "g");
431 return $nr*1073741824 if($fac eq "G");
432 return $nr*1000000 if($fac eq "m");
433 return $nr*1048576 if($fac eq "M");
434 return $nr*1000 if($fac eq "k");
435 return $nr*1024 if($fac eq "K");
436 return $nr if($fac eq "b");
437 die "$fac is not a valid multiplier!";
443 dirsplit [options] [advanced options] < directory >
445 -H|--longhelp Show the long help message with more advanced options
446 -n|--no-act Only print the commands, no action (implies -v)
447 -s|--size NUMBER - Size of the medium (default: $max)
448 -e|--expmode NUMBER - directory exploration mode (recommended, see long help)
449 -m|--move Move files to target dirs (default: create mkisofs catalogs)
450 -p|--prefix STRING - first part of catalog/directory name (default: vol_)
451 -h|--help Show this option summary
452 -v|--verbose More verbosity
454 The complete help can be displayed with the --longhelp (-H) option.
455 The default mode is creating file catalogs useable with:
456 mkisofs -D -r --joliet-long -graft-points -path-list CATALOG
459 dirsplit -m -s 700M -e2 random_data_to_backup/
467 dirsplit [options] [advanced options] < directory >
468 -n|--no-act Only print the commands, no action (implies -v)
469 -s|--size NUMBER - Size of the medium (default: $max)
470 -m|--move Move files to target dirs (default: create mkisofs catalogs)
471 -l|--symlink similar to -m but just creates symlinks in the target dirs
472 -L|--hardlink like -l but creates hardlinks
473 -p|--prefix STRING - first part of catalog/directory name (default: vol_)
474 -f|--filter EXPR - Filter expression, see examples below and perlre manpage
475 --flat Flat dir mode, don't recreate subdirectory structure (not recommended)
476 -e|--expmode NUMBER, special exploration modes, used with directory argument
478 1: (default) native exploration of the specified directory, but file sizes
479 are rounded up to 2048 blocks plus estimated overhead for
480 filenames (see -o option)
481 2: like 1, but all files in directory are put together (as \"atom\") onto the
482 same medium. This does not apply to subdirectories, however.
483 3: like 2, but don't coalesc files when the size of the \"atom\" becomes too
484 large for the medium size (currently $max)
485 4: like 2, but the max. size of the atoms is limited to $max (storing the
486 rest on another medium)
488 -F|--follow Follow symlinks. Use with care!
489 -b|--blksize NUMBER, block size of the target filesystem (currently $bsize).
490 -o|--overhead NUMBER, overhead caused by directory entries (as factor for the
491 filename length, default: 50, empiricaly found for Joliet+RR
492 with not-so-deep directory structure). Works in exploration
494 -a|--accuracy NUMBER (1=faster, large number=better efficiency, default: 500)
495 -S|--simple Simple/stupid/alphabetic mode
496 -T|--input FILENAME (or - for STDIN): List with sizes and paths, try:
497 find dir -type f -printf \"%s %p\n\"
498 to get an example. Avoid duplicates! Unit suffixes are allowed.
499 -h|--help Show this option summary
500 -v|--verbose More verbosity
502 File sizes are expected to be in bytes, append modifier letters to multiply
503 with a factor, eg 200M (b,k,K,m,M,g,G for Bytes, Kb, KiB, Mb, MiB, Gb, GiB).
504 The default output mode is creating file catalogs useable with
505 mkisofs -D -r --joliet-long -graft-points -path-list CATALOG
508 dirsplit -m -s 120M -e4 largedirwithdata/ -p /zipmedia/backup_ #move stuff into splitted backup dirs
509 dirsplit -s 700M -e2 music/ # make mkisofs catalogs to burn all music to 700M CDRs, keep single files in each dir together
510 dirsplit -s 700M -e2 -f '/other\\/Soundtracks/' music/ # like above, only take files from other/Soundtracks
511 dirsplit -s 700M -e2 -f '!/Thumbs.db|Desktop.ini|\\.m3u\$/i' # like above, ignore some junk files and playlists, both letter cases
513 Bugs: overhead trough blocksize alignment and directory entry storage varies,
514 heavily depends on the target filesystem and configuration (see -b and -o).
516 You should compare the required size of the created catalogs, eg.:
517 for x in *list ; do mkisofs -quiet -D -r --joliet-long -graft-points \\
518 -path-list \$x -print-size; done
519 (output in blocks of 2048 bytes) with the expected size (-s) and media data
520 (cdrecord -v -toc ...).
526 # Parms: bin size (int), input array (arr reference), output array (arr reference)
527 # Returns: wasted space (int)
542 my $bestplace=$#out+1;
544 for($i=0;$i<=$#out;$i++) {
546 $rest=$max-$bel[$i]-$_;
547 if($rest>0 && $rest < $bestwert) {
552 if($bestplace>$#out) {
559 $bel[$bestplace]+=$_;
560 push( @{$out[$bestplace]} , $_);
564 # count all rests but the last one
565 for($i=0;$i<$#out;$i++) {
566 $ret+=($max-$bel[$i]);
572 # Parms: bin size (int), input array (arr reference), output array (arr reference)
573 # Returns: wasted space (int)
581 piece: foreach my $obj (@in) {
582 # first fit, use the first bin with enough free space
583 # print "F: bin$i: $obj, @{$names{$obj}}\n";
584 for($i=0;$i<=$#out;$i++) {
585 my $newsize=($bel[$i]+$obj);
586 # print "bel[i]: $bel[$i], new?: $newsize to max: $max\n";
587 if( $newsize <= $max ) {
588 # print "F: bin$i: $bel[$i]+$obj=$newsize\n";
591 push( @{$out[$i]} , $obj);
598 # print "N: bin$i: $bel[$i]=$obj\n";
603 # sum up all rests except of the one from the last bin
604 for($i=0;$i<$#out;$i++) {
605 # print "hm, bel $i ist :".$bel[$i]." und res:".($max-$bel[$i])."\n";
606 $ret+=($max-$bel[$i]);
609 # print "wtf, ".join(",", @{$out[0]})."\n";