/home/wpollock1/public_html/ShScript/find-dups.sh

#!/bin/sh -
# This script compares the MD5 checksum for all plain files
# in the current directory, and reports any duplicates found.
# (Of course, hard links will be reported as duplicates too,
# as will all zero length files.)
# This works by creating an array "md5", indexed by the MD5 sum,
# and containing the filename.  Since not all shells support
# arrays dynamically named variable are used for the same effect.
#
# This highly portable script was adapted from one by
# Peter Seebach in his book "Beginning Portable Shell Scripting"
# (C) 2008 by Apress, page 113.
# Adapted 11/2009 by Wayne Pollock, Tampa Florida USA
# $Id: find-dups.sh,v 1.0 2009/11/11 19:09:27 wpollock Exp $
#
# TODO: Add command line argument of directory to process,
#       A recursive option,
#       a help option

# Set PATH to find all POSIX utilities:
PATH=$(getconf PATH)

# Initialize MD5SUM if not already set:
: ${MD5SUM="md5sum"}

# Only find plain files, and only in the current directory:
find . ! -name . -prune -type f -print | while IFS= read FILE
do
    sum=`"$MD5SUM" "$FILE" | awk '{print $1}'`

    # Make a reference to the array element md5_$sum:
    eval assoc=\$md5_$sum

    # Check if a previous file with this sum was found:
    if test -z "$assoc"
    then
        # Put filename into array element md5_$sum:
        eval md5_$sum=\$FILE
    else
        # Display current and previous file names with same MD5 sum:
        printf '   Duplicates: "%s" and "%s"\n' "$FILE" "$assoc"
    fi
done