#!/usr/bin/perl -w # Program: sizemon.pl # Author: Spencer Shimko # Purpose: Report directory usage generally, by extension, # and common file groups # Usage: ./usrep.pl [-h] [-r size] [dir1 dir2 dir3...] # Created: 7/01/04 # Modified: 7/01/04 # License: # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################# ############################ # USER CONFIGURATION # ############################ # extensions for known file groups # lowercase only # regex allowed (so to use a literal dot use \. ) my %groups = ("Office related files" => [ 'doc', 'ppt', 'xls', 'pdf', 'ps', 'pps', 'pab', 'txt' ], "Image files" => [ 'gif' ,'jpg', 'jpeg', 'tif', 'tiff', 'png' , 'bmp', 'ico', 'jpe' ], "Multimedia files (cumm.)" => [ 'mov' , 'mp3', 'wav', 'mpg', 'wmv', 'wma', 'mp2', 'ogg', 'aac', 'ac3', 'avi' , 'mp1', 'divx', 'mp4', 'qt', 'ram', 'rm', 'vob', 'ogm' ], "Audio files" => [ 'mp3', 'wav', 'wma', 'ogg', 'aac', 'ac3', 'mp1', 'mp2', 'ram', 'm4a' ], "Video files" => [ 'mov', 'mpg', 'wmv', 'divx', 'mp4', 'qt', 'rm', 'vob' ], "Program sources" => [ 'pl' , 'cpp', 'cc', 'c', 'java' ], "Program executables" => [ 'exe', 'bat', 'com', 'pif', 'scr', 'jar' ], "Web related" => [ 'html', 'htm', 'xml', 'css', 'swf' ], "Installation sources" => [ '^.{1,3}_$' ], "Outlook Personal Folders" => [ 'pst' ], "Zips, compressed, packed" => [ 'zip' , 'gz', 'tgz', 'rar', 'rpm' ] ); ############################ # END USER CONFIGURATION # ############################ ############################ # GLOBALS # ############################ # usage string my $usage = "usage: $0 [] [dir1 dir2 dir3...]\ optional flags:\ -h: this help message\ -v: verbose output (warning: could be extremely long)\ -e: display statistics by file extension"; # hash of stats my %estats; # no file extension stats $estats{'???'} = 0; #verbosity my $ver = 0; # if extension stats requested my $restats = 0; use Getopt::Std; use File::stat; use File::Find; use File::Spec; ############################ # END GLOBALS # ############################ ############################ # sub parse cmd line # ############################ sub getcmdline{ my %opts; getopts('hevr:', \%opts); # parse %opts hash (command line args) and create better mnemonic # if -h or no args specified display usage and bail if ( defined $opts{'h'} ){ die $usage; } if ( defined $opts{'v'} ){ print "Verbose output requested!\n"; $ver = 1; } if ( defined $opts{'e'} ){ print "File extension stats requested!\n"; $restats = 1; } } ############################# # end sub parse cmd line # ############################# ############################# # begin file group stats # ############################# sub grpStats { my $grpsize = 0; my ($groups, $stats) = @_; @groups = @$groups; %stats = %$stats; # the most efficient method but it wouldn't allow # for regex's in the group extensions # so iteration is required :( # $grpsize += $stats{$_} if defined $stats{$_}; foreach my $gext ( @groups ){ foreach my $sext ( keys %stats ){ if ( $sext =~ /$gext/ ){ $grpsize += $stats { $sext }; } } } return $grpsize; } ############################# # end sub parse cmd line # ############################# ############################# # begin human readable size # ############################# sub hSize { $size = shift; $x = 0; while ( ( $x <= 3 ) && ( $size > 1000 ) ){ $size /= 1000; $x++; } $size =~ s/^(\d+\.\d).*$/$1/; if ( $x == 0 ){ return $size . "B"; } elsif ( $x == 1 ){ return $size . "K"; } elsif ( $x == 2 ){ return $size . "M"; } else { return $size . "G"; } } ############################# # end human readable size # ############################# ############################# # display extension stats # ############################# sub extStats { my $stats = shift; my %extstats = %$stats; my $cnt = 0; # used for newlines for my $ext ( sort keys %extstats ){ $cnt++; printf "\t%-5s %s", $ext . ":", hSize($extstats{$ext}); print "\n" if ( ( $cnt % 3 ) == 0 ); } print "\n" unless (( $cnt % 3) == 0 ); undef %extstats; } ############################# # end disp extension stats # ############################# ############################# # begin main # ############################# getcmdline(); # create list of parent directories if ( defined $ARGV[0] ){ foreach ( @ARGV ){ push @dirlist, $_; } } else{ # default to cwd use Cwd; push @dirlist, getcwd(); } # simple stat accumulators my $ttlsize = 0; my $ttlfiles = 0; # iterate through directory list # should save a little memory for large structures # and make stats slightly easier foreach $dir ( @dirlist ){ print "Entering $dir" if $ver; # per directory stats my $pdirsize = 0; my $pdirfiles = 0; my %dlist; my %dstats; $dstats{'???'} = 0; find ( sub { # don't count directory sizes or unreadable files return if -d; return if ! -r; my $filesize = -s; $dlist{$File::Find::dir}{$_} = $filesize; }, $dir ); # iterate through the directory hash of filenames foreach $key ( sort keys %dlist ){ print "$key:\n" if $ver; my $dirsize = 0; my $filecnt = 0; # iterate through all the filenames in the directory # each filename is associated with a size while ( my ( $filename, $filesize ) = each %{$dlist{$key}} ){ #print "\t $filename .::. $filesize\n"; $dirsize += $filesize; $filecnt++; # gather stats based on extensions if ( $filename =~ /^.*\..{1,4}$/ ){ my $extension = (split( /^.*\./ , $filename ))[-1]; if ( defined $dstats{lc($extension)} ){ $dstats{lc($extension)} += $filesize; }else { $dstats{lc($extension)} = $filesize; } } else { # include in unknown extensions $dstats{'???'} += $filesize; } } printf "\tSUBTOTAL: %s in $filecnt files.\n\n", hSize ( $dirsize ) if $ver; $ttlsize += $dirsize; $ttlfiles += $filecnt; $pdirsize += $dirsize; $pdirfiles += $filecnt; } for $key ( keys %dstats ){ if ( defined $estats{$key} ){ $estats{$key} += $dstats{$key}; }else { $estats{$key} = $dstats{$key}; } } # print some stats per directory printf "\n**************************************************************\nDirectory: $dir\n"; if ( $restats ){ print "File extension stats:\n"; extStats ( \%dstats ); } print "File group stats:\n"; foreach my $key ( sort keys %groups ){ my $sz = hSize( grpStats ( \@{$groups{$key}}, \%dstats ) ); if ( $sz ne "0B" ){ printf "\t%-25s %s \n", $key, $sz; } } printf "Totals: %s in %s files.\n", hSize($pdirsize), $pdirfiles; printf "**************************************************************\n"; } # display some stats if ( @dirlist > 1 ){ print "\n==============================================================\n"; print "Cummulative stats for all directories traversed:\n"; print "Total statistics by file extension (???=None):\n" if $restats; extStats ( \%estats ) if $restats; print "Total statistics by common file groups:\n"; foreach my $key ( sort keys %groups ){ my $sz = hSize( grpStats ( \@{$groups{$key}}, \%estats ) ); if ( $sz ne "0B"){ printf "\t%-25s %s \n", $key, $sz; } } printf "Cummulative total: %s in $ttlfiles files.\n", hSize($ttlsize); printf "============================================================\n"; }