# FindDuplicates - find (possible) duplicate files # # prints: MyDisk:FileTree:foo (123,456) # MyDisk:FileTree:bar (123,456) # ... use File::Find; find(\&makeFileKey, "MyDisk:FileTree"); # collect info foreach $key (sort(keys(%out))) { # print results printf("%s\n", $out{$key}) if ($cnt{$key} > 1); } sub makeFileKey { # examine $_ if (-f($_)) { # is it a file? open(FILE, $_); # checksum it read(FILE, $in, 1000); # first KB only $sum = unpack("%32C*", $in) % 32767; # make the checksum close(FILE); $siz = -s($_); # get file size $key = "$siz,$sum"; # build a key $cnt{$key}++; $out{$key} .= "$File::Find::name ($key)\n"; } }