/home/wpollock1/public_html/AJava/NatOrderComparator/NatOrderComparator.java

package com.wpollock.natordercomp;

import java.math.BigInteger;
import java.util.Comparator;

/**
 * A natural order String Comparator.
 * Natural order means to compare Strings character for character,
 * except when there are runs of digits in the String.  Each run of
 * digits is compared numerically, not lexically (so "2" < "10").
 * <p>
 * If two runs of digits compare equal numerically, the shorter number
 * is less than the longer one (so "3" < "03").
 * <p>
 * The general algorithm is inspired by
 * <a href="https://en.wikipedia.org/wiki/Natural_sort_order">Natural
 * Sort Order</a> on Wikipedia.
 * Compare two Strings codepoint by codepoint, unless a digit is
 * encountered.  If the other String's codepoint is not a digit,
 * the String with the digit is less.
 * If both are numeric, for each String read all the digits and convert to
 * a number.  The smaller value is less; if values
 * are the same, shorter length is less (to account for leading zeroes). If
 * the numbers are the same, continue with the next codepoint  If everything
 * is the same, the shorter String is less.  Otherwise, they are equal.
 * <p>
 * For this version, no options are provided (such as ignoring leading zeros
 * or spaces, case-insensitive matching, or Unicode normalization).  All
 * such steps are assumed to be done by the caller.
 *
 * @version 1.0 (12/30/2015)
 * @author Wayne Pollock, Tampa Florida USA
 *
 */

public class NatOrderComparator implements Comparator<String> {

    /*
     * While the code strives for a clean, elegant solution, efficiency in a
     * Comparator is important, so this version avoids using regular expressions
     * and tries to avoid creating unnecessary objects.
     */

    // Someday, there should be versions with args for case-insensitive,
    // ignore whitespace, and Unicode normalization & sanitization.  For
    // maximum flexibility, we will use the static factory pattern even
    // though it isn't warranted yet.

    // Reusing noc is safe since NatOrderComparator objects are immutable.
    private static NatOrderComparator noc = new NatOrderComparator();

    /**
     * No-arg constructor.
     */
    private NatOrderComparator() {
    }

    /**
     * Factory method to get a reference to a NatOrderComparator.
     * @return NatOrderComparator
     */
    public static NatOrderComparator getInstance() {
        return noc;
    }

    /**
     * Compares two Strings using a "natural" sorting order.
     *
     * @param s1 the first String to compare
     * @param s2 the second String to compare
     * @return {@literal 0 if s1 equals s2,
     *         <0 if s1 is naturally less than s2,
     *         >0 if s2 is naturally less than s1
     *         }
     * @throws NullPointerException if either argument is null.  This Exception
     * is required by the Comparator contract, not IllegalArgumentException.
     */

    @Override
    public int compare(final String s1, final String s2) {
        if ( s1 == null || s2 == null )
            throw new NullPointerException("s1: " + s1 + ", s2: " + s2);
        if ( s1.equals(s2))  // Includes zero-length Strings
            return 0;

        // Walk the length of the shorter String:
        if ( s1.length() == 0 )
            return -1;  // s2.length must be greater than 0
        if ( s2.length() == 0 )
            return 1;  // s1.length must be greater than 0
        final int len = Math.min(s1.length(), s2.length());

        // Walk through the String, one codepoint at a time:
        // (Since Java 8, you can do this with Streams:
        // s.codePoints().forEach(c -> ...);
        // for(int c : s.codePoints().toArray()) { ... }
        // But I don't think those methods are as efficient.)
        int cp1, cp2;
        int i1 = 0;
        while ( i1 < len ) {
            cp1 = s1.codePointAt(i1);
            cp2 = s2.codePointAt(i1);

            // Case one: neither is a digit:
            if ( ! Character.isDigit(cp1) && ! Character.isDigit(cp2) ) {
                if ( cp1 != cp2 )
                    return cp1 - cp2;
                i1 += Character.charCount(cp1);
            }
            // Case two: only one is numeric:
            else if ( Character.isDigit(cp1) != Character.isDigit(cp2) ) {
                return Character.isDigit(cp1) ? -1: 1;  // numbers < letters

            }
            // Case three: both are numeric:
            else {
                // Find longest run of digits in each String, and extract:
                int numStart = i1, i2 = i1;
                while ( Character.isDigit(cp1) && i1 < s1.length() ) {
                    i1 += Character.charCount(cp1);
                    if ( i1 >= s1.length() )
                        break;
                    cp1 = s1.codePointAt(i1);
                }
                while ( Character.isDigit(cp2) && i2 < s2.length() ) {
                    i2 += Character.charCount(cp2);
                    if ( i2 >= s2.length() )
                        break;
                    cp2 = s2.codePointAt(i2);
                }
                final String num1String = s1.substring(numStart, i1);
                final String num2String = s2.substring(numStart, i2);

                // Convert to int (or BigInteger) and compare numerically:
                try {
                    final int num1 = Integer.parseInt(num1String);
                    final int num2 = Integer.parseInt(num2String);
                    if ( num1 != num2 )
                        return num1 - num2;
                } catch (NumberFormatException nfe) {

                    // If numbers are too big for type int, use BigIntegers:
                    final BigInteger num1 = new BigInteger(num1String);
                    final BigInteger num2 = new BigInteger(num2String);
                    if ( ! num1.equals(num2))
                        return num1.subtract(num2).intValue();
                }
                // Handle leading zero when values are equal; shorter is less:
                if ( num1String.length() != num2String.length() )
                    return num1String.length() - num2String.length();
            }
        }

        // If we get to here, the Strings are the same unless one is longer:
        return s1.length() - s2.length();
    }
}