#!/usr/bin/perl -w # VIBE version 0.5 # A recursive file analyzer #Starting directory: $treeroot = '/home/leiavoia'; #name of logfile: $logfile = 'vibe.log'; #name of outfile: $outfile = 'Vibe_Results.html'; #Files to exclude. Files are NOT full path names: @excludefiles = ('.' , '..' , "$logfile", "$outfile"); #display X number of "Top Files": $topdisplaynum = 100; #File Threshold (in bytes): $filethreshold = 2000000; # 2 MB #Directory Threshold (in bytes): $dirthreshold = 30000000; # 30 MB #=============NO MORE USER CONFIGURATION============# #Check for flags, then assign to function. unless($ARGV[0]) { print "VIBE requires that you first log your filesystem. If this is the first time running vibe, configure the variables in the script itself and then run the scrip the the -L flag.\n\n"; print "After running VIBE in log mode, you may then run VIBE again with any or all of the following flags. It outputs an HTML file.\n"; print "-C Comparison Mode. This compares your current files with your previous log.\n"; print "-DS Directory Sizes Comparisons. Calculates the recursive total size of all directories in your specified search area\n"; print "-FT File Thresholds. Shows you all the files that are larger than a certain threshhold.\n"; print "-DT Directory Thresholds. Shows you all the directories that are larger than a certain threshhold.\n"; print "-TF Top Files. Shows you the largest files in your scanned area, ranked.\n"; print "-TD Top Directories. Shows you the largest directories in your scanned area, ranked.\n"; print "\n"; exit; } if ($functions{'-DS'}) {&PrintDirSizes} if ($functions{'-FT'}) {&FileThreshold} if ($functions{'-DT'}) {&DirThreshold} if ($functions{'-TF'}) {&TopFiles} if ($functions{'-TD'}) {&TopDirs} @ARGV = map {uc($_)} @ARGV; %functions = (); #now put it into a hash for easier reference: foreach $i (@ARGV) {$functions{$i} = 1}; #what kinds of operations do we want to have? list them: # -L Log the contents of the system # -C Compare system to the log to check for variences (vibrations) # -DS Directory Size check. Check the log for directory size bottlenecks (where did our storage space go?) # -Z Zero directory size check. Delete empty directories that do nothing (?) # -T Threshold file/dir size check (pin everyone over "X" MB) # -H Help. give an explaination of all these things. #OTHERS: # File type distrobutions ("80% .mp3, 10% .jpg...") # Top X largest 1) recursive dir sizes, 2) local dir sizes # with size and % of total for each entry. #HOME directory analysis #Start the master list of absolute file names. #All names and attributes go in here. %masterlist = (); #Start the Directory Tracker. #Push and pop to modify the current branch of scanning. @DirTracker = ("$treeroot"); #Start File Counters: $numfiles = 0; $expectnum = 0; $totalmb = 0; ########### MAIN SEQUENCE ########## unless($functions{'-L'}) {&BuildLogList} if ( ($functions{'-C'}) or ($functions{'-L'}) ) { print "[SCANNING FILE SYSTEM]\n"; if ($expectnum == 0) {$expectnum = '?'} print "Expecting: $expectnum \n"; print "Files Scanned: "; &ReadDir; print "\n"; } unless($functions{'-L'}) { open(OUT, ">$outfile"); print OUT "VIBE Results"; } if ($functions{'-L'}) { &PrintLogToFile; print "[PROGRAM COMPLETED]\n\n"; exit; } if ($functions{'-C'}) {&Compare} &DirSizes; if ($functions{'-DS'}) {&PrintDirSizes} if ($functions{'-FT'}) {&FileThreshold} if ($functions{'-DT'}) {&DirThreshold} if ($functions{'-TF'}) {&TopFiles} if ($functions{'-TD'}) {&TopDirs} unless($functions{'-L'}) { print OUT ""; close OUT; } print "[PROGRAM COMPLETED]\n\n"; exit; ########### MAIN SEQUENCE ########## #----------------------------------------------------- #first obtain and build the comparison list from the logfile if we are comparing. sub BuildLogList { print "Building logged comparison list...\n"; my $counter = 0; #global Loglist. [0] = file size, [1] = mod date, [2] creation date %loglist = (); open(COMPARE, "$logfile"); while(){ my @temp = split(/\t/, $_); push @{ $loglist{$temp[0]} }, $temp[1], $temp[2], $temp[3]; $expectnum++; } close COMPARE; } #----------------------------------------------------- #----------------------------------------------------- #Recursive engine for scanning dirs: sub ReadDir { #mush DirTracker together for a good directory name: my $thisdir = join ('/', @DirTracker); #get the current dir's contents: local(*CURRENT); opendir(CURRENT, "$thisdir"); my @thisdirinfo = readdir CURRENT; closedir CURRENT; #Find all subdirectories and files in the current directory excluding some. #If it's a file push it into the masterlist. #Otherwise recursively execute this subroutine again. foreach $i (@thisdirinfo) { if( &Exclude($i) ) {next} #skip if it matches an exclude file elsif (-d "$thisdir/$i") { push @DirTracker, $i; #put the dir to search next in the tracker &ReadDir; } elsif (-f "$thisdir/$i"){ my $thefile = "$thisdir/$i"; #makes things easier. push @{ $masterlist{$thefile} }, -s $thefile, -C $thefile, -M $thefile; #delete and update the running counter here: my $L = length($numfiles); print "\b"x$L; print ++$numfiles; } } #Remove this directory from the tracker to go back up a level: pop @DirTracker; } #----------------------------------------------------- #----------------------------------------------------- #now actually compare the two lists and print results sub Compare { print "[COMPARING FILES]\n"; print OUT "
FILE VIBRATIONS
"; foreach $key (keys %loglist) { if(not $masterlist{$key}) { print OUT "$key appears to have been deleted

"; } elsif($masterlist{$key}[0] != $loglist{$key}[0]) { print OUT "$key was modified and has a different file size:
"; print OUT '     '; print OUT "Size is: $masterlist{$key}[0]"; print OUT '     '; print OUT "Size was: $loglist{$key}[0]"; print OUT '     '; my $dif = $masterlist{$key}[0] - $loglist{$key}[0]; print OUT "Change: $dif

"; } #elsif($masterlist{$key}[1] != $loglist{$key}[1]) { # print "$key was recreated (deleted then made again):\n"; # print "\tCreation date is: $masterlist{$key}[0]\tCreation date was: $loglist{$key}[0]\t"; # my $dif = $loglist{$key}[1] - $masterlist{$key}[1]; # print "Change: $dif\n\n"; # } #elsif($masterlist{$key}[2] != $loglist{$key}[2]) { # print "$key was modified but there was no change in file size:\n"; # print "\tMod date is: $masterlist{$key}[0]\tMod date was: $loglist{$key}[0]\t"; # my $dif = $loglist{$key}[2] - $masterlist{$key}[2]; # print "Change: $dif\n\n"; # } delete $masterlist{$key}; } #end of foreach #check to see if any new files were created. #we deleted everything but the following as we went: foreach $key (keys %masterlist) { print OUT "$key was newly created

"; } print OUT '

'; print "Comparison results logged to $outfile.\n"; } #end if(compare) #----------------------------------------------------- #----------------------------------------------------- #Plot each directory and sort by directory size (sum of all files + subfolders) sub DirSizes { print "[COMPUTING DIRECTORY SIZES]\n"; #dirsizes hash of arrays: [0] = levels deep. [1] = local dir size. [2] = recursive total dir size #global variable for use with other subroutines %dirsizes = (); #find the unique folders: print "Finding unique directories...\n"; @uniquedirs = &GetUniqueDirs; #foreach unique dir, get the number of levels deep it is... #push into the %dirsizes hash, initialize other variables, then sort by depth later: foreach $dir (@uniquedirs) { my $matches = 0; while ($dir =~ /\//g) {$matches++} $dirsizes{$dir}[0] = $matches; $dirsizes{$dir}[1] = 0; $dirsizes{$dir}[2] = 0; } #sort once by depth for efficiency: @uniquedirs = sort { $dirsizes{$b}[0] <=> $dirsizes{$a}[0] } @uniquedirs; #compare all files in the loglist to the uniques. print "Calculating sizes...\n"; foreach $file (keys %loglist) { #find the dir it goes into by hacking off the file again: my @temp = split /\//, $file; pop @temp; my $newkey = join "/", @temp; #add to the dir the size of this file $dirsizes{$newkey}[1] += $loglist{$file}[0]; } #all files are delt with. Now add the size of each dir to it's parent #dump the value of the local total size into the recursive total #so that both values go up the tree: foreach $dir (@uniquedirs) { $dirsizes{$dir}[2] = $dirsizes{$dir}[1]; } foreach $dir (@uniquedirs) { #find the parent dir it goes into by hacking off the main dir: my @temp = split /\//, $dir; pop @temp; my $newkey = join "/", @temp; #add to the parent dir the *recursive* size of this dir $dirsizes{$newkey}[2] += $dirsizes{$dir}[2]; #unless(exists $dirsizes{$newkey}) {print "none<---- '$newkey'\n"} } } #------------------------------------------------- #------------------------------------------------- sub PrintDirSizes { #print out in HTML format. print "Size comparisons complete.\nNow writing results to $outfile...\n\n"; print OUT ""; print OUT ""; foreach $dir ( @uniquedirs) { #get display sizes: my $displocalsize = &DisplaySize( $dirsizes{$dir}[1] ); my $disprecsize = &DisplaySize( $dirsizes{$dir}[2] ); print OUT ""; print OUT ""; print OUT ""; print OUT ""; } print OUT "
DIRECTORIES
$dir
Local Size: $displocalsize ($dirsizes{$dir}[1] bytes)Recursive Size: $disprecsize ($dirsizes{$dir}[2] bytes)Levels Deep: $dirsizes{$dir}[0]
"; $testnum = $#uniquedirs; foreach $i (keys %loglist) { $totalmb += $loglist{$i}[0]; } $totalmb = &DisplaySize($totalmb); print "$testnum unique directories analyzed\n"; print "$totalmb survey-wide disk space consumption\n\n"; print OUT '

'; } #end sub #----------------------------------------------------- #----------------------------------------------------- sub TopFiles { print OUT "TOP $topdisplaynum FILES
"; my @files = sort { $loglist{$b}[0] <=> $loglist{$a}[0] } keys(%loglist); for ($counter = 0; $counter <= $topdisplaynum; $counter++) { unless( $files[$counter] ) {last}; my $viscounter = $counter + 1; print OUT "$viscounter) $files[$counter]
"; my $dispsize = &DisplaySize($loglist{$files[$counter]}[0]); print OUT " $dispsize ($loglist{$files[$counter]}[0] bytes)

"; } print OUT '

'; } #----------------------------------------------------- #----------------------------------------------------- sub FileThreshold { my $threshdispsize = &DisplaySize($filethreshold); print OUT "FILES OVER $threshdispsize ($filethreshold bytes)
"; my @files = sort { $loglist{$b}[0] <=> $loglist{$a}[0] } keys(%loglist); foreach $file (@files) { if($loglist{$file}[0] > $filethreshold) { print OUT "$file
"; my $dispsize = &DisplaySize($loglist{$file}[0]); print OUT " $dispsize ($loglist{$file}[0] bytes)

"; } } print OUT '

'; } #----------------------------------------------------- #----------------------------------------------------- sub TopDirs { print OUT "TOP $topdisplaynum DIRECTORIES
"; my @dirs = sort { $dirsizes{$b}[2] <=> $dirsizes{$a}[2] } @uniquedirs; for ($counter = 0; $counter <= $topdisplaynum; $counter++) { unless( $dirs[$counter] ) {last}; my $viscounter = $counter + 1; print OUT "$viscounter) $dirs[$counter]
"; my $dispsize = &DisplaySize($dirsizes{$dirs[$counter]}[2]); print OUT " $dispsize ($dirsizes{$dirs[$counter]}[2] bytes)

"; } print OUT '

'; } #----------------------------------------------------- #----------------------------------------------------- sub DirThreshold { my $threshdispsize = &DisplaySize($dirthreshold); print OUT "DIRECTORIES OVER $threshdispsize ($dirthreshold bytes)
"; my @dirs = sort { $dirsizes{$b}[2] <=> $dirsizes{$a}[2] } @uniquedirs; foreach $dir (@dirs) { if($dirsizes{$dir}[2] > $dirthreshold) { print OUT "$dir
"; my $dispsize = &DisplaySize($dirsizes{$dir}[2]); print OUT " $dispsize ($dirsizes{$dir}[2] bytes)

"; } } print OUT '

'; } #----------------------------------------------------- #----------------------------------------------------- #print the whole mess to file if we are logging: #each entry (file) on a line, tab delimited: sub PrintLogToFile { print "Now writting log to disk...\n"; open(LOG, ">$logfile"); foreach $e (keys %masterlist) { print LOG "$e\t"; print LOG "$masterlist{$e}[0]\t"; print LOG "$masterlist{$e}[1]\t"; print LOG "$masterlist{$e}[2]\n"; } close LOG; } #----------------------------------------------------- #----------------------------------------------------- sub GetUniqueDirs { #but only from the %loglist my %temp; my @unique; #find the levels deep of the specified start directory my $startdeep = 0; while ($treeroot =~ /\//g) {$startdeep++} while( @pair = each(%loglist) ) { #hack off the file at the end. we just want the directory: #scan each directory in the full path to make sure the recursive #"passing up" works. Dirs with no files will not register otherwise. my @temp = split /\//, $pair[0]; #do all parent dirs, but don't pass our start point. while($#temp > $startdeep) { pop @temp; my $newkey = join "/", @temp; unless( $temp{$newkey}++ ) {push @unique, $newkey} } } return @unique; } #----------------------------------------------------- #----------------------------------------------------- sub DisplaySize { my $dispnum; #convert to GB if($_[0] > 999999999) { my $thisnum = $_[0] / 1000000000; #/ $dispnum = sprintf("%.2f Gb", $thisnum); } #convert to MB elsif($_[0] > 999999) { my $thisnum = $_[0] / 1000000; #/ $dispnum = sprintf("%.2f Mb", $thisnum); } #convert to KB elsif($_[0] > 999) { my $thisnum = $_[0] / 1000; #/ $dispnum = sprintf("%.2f Kb", $thisnum); } #convert to B else { $dispnum = sprintf("%d", $_[0]); } return $dispnum; } #----------------------------------------------------- #----------------------------------------------------- #Exclude: returns 1 if exclude file was matched, 0 if not. sub Exclude { foreach $i (@excludefiles) { if ($i eq $_[0]) {return 1} } return 0; } #-----------------------------------------------------