#! /usr/bin/perl #Note to John: Add .dgn aux, rrg,E00, .dxf to file extension list to move ## these directories in the root are ignored @projectsToIgnore = ( '0200', '0400', '0900', '0500', '0300', 'Proposals', 'filecopy', 'Company Stock Pictures', 'Quals', 'SEIgisCommon', 'Templates and Procedures', 'TEMP' ); @moveThese = ( 'aux', 'adf', 'apx', 'atx', 'avl', 'bmp', 'csv', 'dbf', 'dgn', 'dwg', 'dxf', 'e00', 'flt', 'hdr', 'jgw', 'lyr', 'met', 'prj', 'pmf', 'rrd', 'rrg', 'sbn', 'sbx', 'sdc', 'sdi', 'shp', 'shx', 'tfw', 'wld', 'wmf', 'xdc', 'xls', ); @appendThese = ( 'xml', 'prj', 'txt', ); ## calculate the cross product of these two vectors :-) ## all these are marked for moving @tryTheseToo = map { my $a = $_; map { $_ = $a.'.'.$_; } @appendThese; } @moveThese; push @tryTheseToo, 'jpg.xml', 'tif.xml', 'jpg.prj', 'tif.prj'; # these are iffy files, if they appear to be related to moving files, they are moved @testThese = ( 'jpg', 'tif', 'txt', 'apm', ); # if the extension is found, same file name with other exts are all demoted. %demoteTheseEtc = ( 'ai' => ['jpg','pdf','eps', 'tif', 'aux'], 'pdf' => [ 'tif', 'jpg' ], ); # these are always marked to copy @copyTheseFiles = ( 'Picasa.ini', 'scheme.ini', 'xtdefaults.txt', 'metadata.xml', 'arc.dir', ); # if the file root contains these words, promote it @promoteFileNames = ( 'GlobeXplorer' ); @demoteIfDirectoryContains = ( 'EIR' ); @promoteIfDirectoryContains = ( 'lnk', 'subarea', 'streets', 'raster', 'shapefiles', 'geodatabase', ); @promoteIfAnyDirectoryContains = ( 'gis', 'gisfiles', 'datadepot', 'arcview themes', ); my $startDir = '/cygdrive/g/'; my $destDir = '/cygdrive/q/'; my $d; ################################################################## use FileMirror; opendir DH, $startDir; while( $d = readdir DH ) { next if ($d =~ /^\./); if (! (grep $d eq $_, @projectsToIgnore) ) { visitFolders( $startDir.$d, \&analyzeAndMove ); } } closedir DH; sub visitFolders { my $dir = shift @_; my $funcRef = shift @_; my $d; $dir=$dir.'/' if $dir!~m#/$#; ## append trailing slash #print $dir; opendir(DF, $dir) || die("can't opendir $dir"); my @subDirs = grep { $_ !~ /^\./ && -d $dir.$_ && (! -l $dir.$_) } readdir DF; closedir DF; #print @subDirs; foreach my $d (@subDirs) { visitFolders( $dir.$d, $funcRef ); } &$funcRef( $dir.$d ); } sub analyzeAndReport($) { my $dir = shift @_; my %result = analyze($dir); return if (%result==0); ## do nothing if empty dir my $drivedir = $dir; $drivedir =~ s/\/cygdrive\/(.)\//:\\/; $drivedir = ucfirst($1).$drivedir; $drivedir =~ s/\//\\/g; print "*** $drivedir\n"; my $score; foreach my $key (keys %result) { $score = $result{$key}; print " " if ($score>0); print " " if ($score>1); print (' ' x $score); if ($score<0) { $score='o'; } print "$score, $key\n"; } print "\n\n"; } sub analyze($) { $dir = shift @_; #get all the files in the directory my %files = (); my %roots = (); my $printdebug = 1; opendir DF, $dir; while( $f = readdir DF ) { if ( ! -d $dir.$f and $f =~ /^[^.].+?\..{1,3}$/ ) { $files{$f} = 0; } } closedir DF; #get all the ROOTs of filenames in the directory foreach my $f (keys(%files)) { $roots{ lc(($f =~ /(.+?)\.[a-z][a-z0-9.]*$/)[0]) } += 1; } #print join( ' - ', %roots ); #for each file # test against the extensions, and plus the suffixes, add 1 to score # if ext plus ext.suffix exists, add 1 to both # test against the must-copy filenames, add 2 to score # test if it's a possible mover # does the root > 1? then add 1 foreach my $f (keys(%files)) { ($root, $ext) = ($f =~ /(.+?)\.([a-z][a-z0-9.]*)$/i); $root = lc($root); $ext = lc($ext); if ($root eq '' or $ext eq '' ) { print "can't parse $f\n" if ($printdebug > 2); } if (grep(/^$ext$/i, ( @moveThese, @tryTheseToo ) )) { print "promoting for extension $ext\n" if ($printdebug > 2); $files{$f}++; } # if we have a long ext, and the short one exists too... bonus if ($root and $ext and $ext =~ /(.+)\..+/) { $r = $root.'.'; if (defined($files{$r.$1})) { print "promoting for extended extension\n" if ($printdebug > 2); $files{$r.$ext}++; $files{$r.$1}; } } if ($f !~ /[+*{}\/^\(\)]/) #exclude files that include quantifiers { if (grep(/^$f$/i, @copyTheseFiles)) { print "promoting specific file $f\n" if ($printdebug > 2); $files{$f} = 1; } } # siblings have unfair advantages if ($roots{$root} > 1) { print "promoting root $root\n" if ($printdebug > 2); $files{$f}++; } foreach my $n (@promoteFileNames) { if ($root =~ /$n/) { print "promoting file name $root\n" if ($printdebug > 2); $files{$f}++; } } # some files are high-risk - test against this list # if the root exists and > 1, meaning it has a partner, promote # if there's a partner file with the extra extension (like .xml) then promote if ( grep( $ext eq $_, @testThese ) ) { if ($roots{$root} > 1) { $files{$f}++; } foreach my $etc (@appendThese) { if (defined($files{"$f.$etc"})) { $files{$f}++; } } } # demote some files, and files named like them if ( grep( /^$ext$/i, keys(%demoteTheseEtc) ) ) { $files{$f} = 0; @etcs = @{$demoteTheseEtc{$ext}}; foreach my $etc (@etcs) { if (defined($files{"$root.$etc"})) { print "demoting for $etc\n" if ($printdebug > 2); $files{"$root.$etc"} -= 1; } } } } #compute a ratio of files to move or copy, to all files # if ratio > 70%, then add 1 to all files $copiers = grep( $files{$_} >= 1, keys(%files) ); $all = keys(%files); if ($all>0) { $ration = $copiers/$all; if ($ratio > .7) { print "promoting by ratio\n" if ($printdebug > 2); map { $files{$_}++; } keys(%files); } if ($ratio < .2) { print "demoting by ratio\n" if ($printdebug > 2); map { $files{$_}--; } keys(%files); } } # if the directory appears to be something, demote $finalDir = (split(m#/#, $dir))[-1]; if (grep(/$finalDir/i, @demoteIfDirectoryContains)) { print "demoting for dir name\n" if ($printdebug > 2); map { $files{$_}--; } keys(%files); } # if the directory appears to be something, promote $finalDir = (split(m#/#, $dir))[-1]; if (grep(/$finalDir/i, @promoteIfDirectoryContains)) { print "promoting for dir name\n" if ($printdebug > 2); map { $files{$_}++; } keys(%files); } #if any parent directory contains a string, promote foreach my $str (@promoteIfAnyDirectoryContains) { if ( $dir=~ /$str/i ) { map { $files{$_}++; } keys(%files); } } return %files; } # # copy files with a score of 1 # move files with a score of 2 or more # sub analyzeAndMove($) { my $dir = shift @_; my %result = analyze($dir); return if (%result==0); ## do nothing if empty dir my $fm = FileMirror->new( $startDir, $destDir ); my $score; foreach my $key (keys %result) { $score = $result{$key}; if ($score>1) { $fm->move($dir.$key); } elsif ($score>0 && $score<=1) { $fm->copy($dir.$key); } } } #if no files have been marked for moving, continue with nxet directory #prepare a report #all unmarked files get listed first as "not moved" an this report #is saved to the destination - so user there can bring over files #all marked files get listed second, and this report is written to #the source directory #a report of all files, with dispositions marked, is written to the main log #format should be like this; #M G:\foo\bar\x.ini #C G:\foo\bar\x.txt #- G:\foo\bar\y #that way, it's easy to see what hasn't been moved #copy all the files marked for moving or copying #delete all the files marked for moving #continue with next directory