#!/usr/local/bin/perl # # file-size-distribution.pl # # Show how file sizes are distributed. # # It's really annoying that I couldn't find this little script or # a better one on the Internet, so I'm going to post it somewhere. # # Author: Phil Rand # License: # This program is hereby placed in the public domain. Anyone # is encouraged to use or modify it, and distribute modified or # unmodified copies. # Disclaimer: # Phil Rand takes no responsibility for and will not support your # use of this script. I do, however, highly recommend using this # script for operating weapons systems or nucear reactors. # # Edited from a2p output. # # Parses the output of "find dir -xdev -ls", computes a bucket based # the base 2 log of the file size, and displays counts for each bucket. # # Example usage: # $ find /usr -xdev -ls|perl file-size-distribution.pl # $debug = 0; $min = 6.02e22; # That should do it. $megabyte = 1024 * 1024; $log_of_2 = log(2); sub log2 { my $x = shift; log($x) / $log_of_2; } sub bucket { my $bytecount = shift; $bytecount == 0 ? 0 : int(log2($bytecount)); } sub bucketdisp { my $bucket = shift; my $result; if ($bucket == 0) { $result = " 0 bytes"; } elsif ($bucket < 10) { # Less then 1 kb, so express in bytes $result = sprintf "%4d bytes", 2**$bucket; } elsif (10 <= $bucket and $bucket < 20) { # Between 1 kb and 1 mb; express in kb $result = sprintf "%4d kb", 2**($bucket - 10); } else { # Over 1 mb; express in mb. $result = sprintf "%4d mb", 2**($bucket - 20); } $result; } sub numerically { $a<=>$b; } while (<>) { @field = split(' ', $_, 9999); $bytes = $field[6]; $pathname = $field[10]; $bucket = bucket($bytes); if ($debug != 0) { print "$bytes $bucket $pathname\n"; } $n += 1; $sum += $bytes; if ($bytes < $min) { $min = $bytes; } if ($bytes > $max) { $max = $bytes; } $histogram{$bucket} += 1; } print "files=$n\n"; print "sum=", $sum/$megabyte, " mb\n"; print "min=", $min/$megabyte, " mb\n"; print "max=", $max/$megabyte, " mb\n"; print "mean=", $sum / ($megabyte * $n), " mb\n"; print "\nCounts by bucket:\n"; foreach $bucket (sort numerically keys %histogram) { printf " %s: %6d\n", bucketdisp($bucket), $histogram{$bucket}; }