#!/usr/local/bin/perl # dups.pl use Digest::MD5; use strict; use warnings; my %files = (); my @todo = (); my ($include_empty, $include_hidden, $recurse, $show_dups, $show_uniques, $show_usage, $verbose); OPT: for (@ARGV) { if (s/^\-//) { $include_hidden = 1 if s/a//; $show_usage = 1 if s/h//; $show_dups = 1 if s/p//; $recurse = 1 if s/r//; $show_uniques = 1 if s/u//; $verbose = 1 if s/v//; $include_empty = 1 if s/z//; if ($_) { print STDERR "Unknown option(s): $_\n"; $show_usage = 1; }; } else { # This must be the name of a filename or directory to check. $_ .= '/' if -d && ! m!/$!; push @todo, $_; } } if ($show_usage) { while () { s/%CMD%/$0/g; print; } exit; } @todo = ('./') unless @todo; $show_dups = 1 unless $show_uniques; FILE: while (@todo) { my $filename = shift (@todo); if (-d $filename) { if ($filename =~ s!/$!! || $recurse) { print STDERR "scanning $filename\n" if $verbose; push @todo, glob ("$filename/*"); push @todo, grep (! m!/\.\.?$!, glob ("$filename/.*")) if $include_hidden; } } else { next FILE if -z $filename && ! $include_empty; if (open (IN, $filename)) { print STDERR "reading $filename\n" if $verbose; my $ctx = new Digest::MD5; $ctx->addfile (*IN); my $digest = $ctx->b64digest; $files{$digest} = [] unless defined ($files{$digest}); push @{$files{$digest}}, $filename; } } } DIGEST: for my $digest (sort {$files{$a}[0] cmp $files{$b}[0]} (keys (%files))) { my @filenames = @{$files{$digest}}; if (@filenames == 1) { print $filenames[0], "\n" if $show_uniques; } else { next DIGEST unless $show_dups; @filenames = sort (@filenames); my $first = shift (@filenames); for my $filename (sort (@filenames)) { printf "%s == %s\n", $first, $filename; } } } __DATA__ Usage: %CMD% [OPTION]... [FILE_OR_DIR]... Find identical files. If no options are given, assume -p. -a Include entries beginning with a period (.). -h Show usage and exit. -p Print out pairs of identical files ("file1 == file2"). -r Recurse subdirectories. -u Print out names of unique files. -v Verbose mode: print progress to stderr. -z Include empty (zero-length) files. Version 1.0; 2007-09-13. Copyright (c) 2005--07, Wastholm Media. Please see http://www.wastholm.com/ for more information.