/home/wpollock1/public_html/AJava/NatOrderComparator/NatOrderComparator.java
package com.wpollock.natordercomp;
import java.math.BigInteger;
import java.util.Comparator;
/**
* A natural order String Comparator.
* Natural order means to compare Strings character for character,
* except when there are runs of digits in the String. Each run of
* digits is compared numerically, not lexically (so "2" < "10").
* <p>
* If two runs of digits compare equal numerically, the shorter number
* is less than the longer one (so "3" < "03").
* <p>
* The general algorithm is inspired by
* <a href="https://en.wikipedia.org/wiki/Natural_sort_order">Natural
* Sort Order</a> on Wikipedia.
* Compare two Strings codepoint by codepoint, unless a digit is
* encountered. If the other String's codepoint is not a digit,
* the String with the digit is less.
* If both are numeric, for each String read all the digits and convert to
* a number. The smaller value is less; if values
* are the same, shorter length is less (to account for leading zeroes). If
* the numbers are the same, continue with the next codepoint If everything
* is the same, the shorter String is less. Otherwise, they are equal.
* <p>
* For this version, no options are provided (such as ignoring leading zeros
* or spaces, case-insensitive matching, or Unicode normalization). All
* such steps are assumed to be done by the caller.
*
* @version 1.0 (12/30/2015)
* @author Wayne Pollock, Tampa Florida USA
*
*/
public class NatOrderComparator implements Comparator<String> {
/*
* While the code strives for a clean, elegant solution, efficiency in a
* Comparator is important, so this version avoids using regular expressions
* and tries to avoid creating unnecessary objects.
*/
// Someday, there should be versions with args for case-insensitive,
// ignore whitespace, and Unicode normalization & sanitization. For
// maximum flexibility, we will use the static factory pattern even
// though it isn't warranted yet.
// Reusing noc is safe since NatOrderComparator objects are immutable.
private static NatOrderComparator noc = new NatOrderComparator();
/**
* No-arg constructor.
*/
private NatOrderComparator() {
}
/**
* Factory method to get a reference to a NatOrderComparator.
* @return NatOrderComparator
*/
public static NatOrderComparator getInstance() {
return noc;
}
/**
* Compares two Strings using a "natural" sorting order.
*
* @param s1 the first String to compare
* @param s2 the second String to compare
* @return {@literal 0 if s1 equals s2,
* <0 if s1 is naturally less than s2,
* >0 if s2 is naturally less than s1
* }
* @throws NullPointerException if either argument is null. This Exception
* is required by the Comparator contract, not IllegalArgumentException.
*/
@Override
public int compare(final String s1, final String s2) {
if ( s1 == null || s2 == null )
throw new NullPointerException("s1: " + s1 + ", s2: " + s2);
if ( s1.equals(s2)) // Includes zero-length Strings
return 0;
// Walk the length of the shorter String:
if ( s1.length() == 0 )
return -1; // s2.length must be greater than 0
if ( s2.length() == 0 )
return 1; // s1.length must be greater than 0
final int len = Math.min(s1.length(), s2.length());
// Walk through the String, one codepoint at a time:
// (Since Java 8, you can do this with Streams:
// s.codePoints().forEach(c -> ...);
// for(int c : s.codePoints().toArray()) { ... }
// But I don't think those methods are as efficient.)
int cp1, cp2;
int i1 = 0;
while ( i1 < len ) {
cp1 = s1.codePointAt(i1);
cp2 = s2.codePointAt(i1);
// Case one: neither is a digit:
if ( ! Character.isDigit(cp1) && ! Character.isDigit(cp2) ) {
if ( cp1 != cp2 )
return cp1 - cp2;
i1 += Character.charCount(cp1);
}
// Case two: only one is numeric:
else if ( Character.isDigit(cp1) != Character.isDigit(cp2) ) {
return Character.isDigit(cp1) ? -1: 1; // numbers < letters
}
// Case three: both are numeric:
else {
// Find longest run of digits in each String, and extract:
int numStart = i1, i2 = i1;
while ( Character.isDigit(cp1) && i1 < s1.length() ) {
i1 += Character.charCount(cp1);
if ( i1 >= s1.length() )
break;
cp1 = s1.codePointAt(i1);
}
while ( Character.isDigit(cp2) && i2 < s2.length() ) {
i2 += Character.charCount(cp2);
if ( i2 >= s2.length() )
break;
cp2 = s2.codePointAt(i2);
}
final String num1String = s1.substring(numStart, i1);
final String num2String = s2.substring(numStart, i2);
// Convert to int (or BigInteger) and compare numerically:
try {
final int num1 = Integer.parseInt(num1String);
final int num2 = Integer.parseInt(num2String);
if ( num1 != num2 )
return num1 - num2;
} catch (NumberFormatException nfe) {
// If numbers are too big for type int, use BigIntegers:
final BigInteger num1 = new BigInteger(num1String);
final BigInteger num2 = new BigInteger(num2String);
if ( ! num1.equals(num2))
return num1.subtract(num2).intValue();
}
// Handle leading zero when values are equal; shorter is less:
if ( num1String.length() != num2String.length() )
return num1String.length() - num2String.length();
}
}
// If we get to here, the Strings are the same unless one is longer:
return s1.length() - s2.length();
}
}