/home/wpollock1/public_html/ShScript/find-dups.sh
#!/bin/sh -
# This script compares the MD5 checksum for all plain files
# in the current directory, and reports any duplicates found.
# (Of course, hard links will be reported as duplicates too,
# as will all zero length files.)
# This works by creating an array "md5", indexed by the MD5 sum,
# and containing the filename. Since not all shells support
# arrays dynamically named variable are used for the same effect.
#
# This highly portable script was adapted from one by
# Peter Seebach in his book "Beginning Portable Shell Scripting"
# (C) 2008 by Apress, page 113.
# Adapted 11/2009 by Wayne Pollock, Tampa Florida USA
# $Id: find-dups.sh,v 1.0 2009/11/11 19:09:27 wpollock Exp $
#
# TODO: Add command line argument of directory to process,
# A recursive option,
# a help option
# Set PATH to find all POSIX utilities:
PATH=$(getconf PATH)
# Initialize MD5SUM if not already set:
: ${MD5SUM="md5sum"}
# Only find plain files, and only in the current directory:
find . ! -name . -prune -type f -print | while IFS= read FILE
do
sum=`"$MD5SUM" "$FILE" | awk '{print $1}'`
# Make a reference to the array element md5_$sum:
eval assoc=\$md5_$sum
# Check if a previous file with this sum was found:
if test -z "$assoc"
then
# Put filename into array element md5_$sum:
eval md5_$sum=\$FILE
else
# Display current and previous file names with same MD5 sum:
printf ' Duplicates: "%s" and "%s"\n' "$FILE" "$assoc"
fi
done