/home/wpollock1/public_html/AJava/BreakIterDemo.java

// BreakIterator demo
// Shows a correct way to process a String one Unicode character
// at a time.  This is difficult, as multi-char characters may
// be in the input either by accident, by non-English users,
// or by attackers attempting to break your software.
//
// Written 6/2012 by Wayne Pollock, Tampa Florida USA
// Inspired by the BreakIterator Java docs, and the incorrect code
// at <https://www.securecoding.cert.org/confluence/display/java/
// IDS10-J.+Do+not+split+characters+between+two+data+structures#IDS10-JDonotsplit
// charactersbetweentwodatastructures-CompliantSolutionSubstring>

import java.text.*;

class BreakIterDemo
{
  public static void main ( String [] args ) {
    for ( String source : args )
      System.out.println( source + " --> " + getLeadingText( source ) );
  }

  private static String getLeadingText( String source ) {
    final BreakIterator iter =
      BreakIterator.getCharacterInstance();
    iter.setText( source );

    // This ugly loop must track both the start and end index
    // of each (possibly multi-char) character:
    int start, end;
    for ( start = iter.first(), end = iter.next();
          end != BreakIterator.DONE;
          start = end, end = iter.next() )
    {

      int ch = source.codePointAt( start );
      if ( ! Character.isLetter( ch ) )
        break;  // end of leading good text
     }

    if ( end == BreakIterator.DONE )
      return source;
    else
      return source.substring( 0, start );
  }
}