// UseDOM.java. This program parses an XML file and produces HTML
// output. The URI of the XML file (ex: "TechTipArchive.xml")
// and the output file name (ex: "ListTipsByAuthor.htm") must be
// provided on the command line.
// The sample xml file contains XML elements of authors and tips.
// The program reads the XML file, builds a DOM tree, and gets all
// the author elements (and saves them in a HashMap).
// An output DOM is created with an HTML element at the root. After
// adding a title, the first author is added as an HTML "H2" element,
// then the imput DOM is searched for all tip elements that have that
// author as an attribute. Each such tip is then added to the output
// DOM as an HTML link ("A HREF" tag).
// After processing all the tips for one author, the next author's
// tips are added to the output DOM in the same way.
// If all we wanted was to output a modified XML document, we'd be done.
// (By adding XSL styles, this is not a bad solution!) But to output
// HTML, each node (element) in the output DOM must be printed as
// HTML. Since DOM elements contain other elements, this is a
// recursive process. Using Sun's "JAXP" parser, you can simply
// call an element's "toString() method to convert it to an HTML
// tag. Sadly this is not standard (yet?), so you must write your
// own "print" method to do this. The print method used here is a
// modified version of DOMWriter.java sample code that comes with
// xerces-j. (The original outputs XML not HTML.)
// Note that print uses a "normalize" method, to replace reserved
// symbols in the text such as "<" and "&" with character entities
// such as "<" and "&".
// Adapted by Wayne Pollock, Tampa Florida USA, 2/2001,
// from an article appearing on Sun's JDC (Jaca Developer Connection).
// (See: http://developer.java.sun.com/.)
import java.io.*;
import java.net.*;
import java.util.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
public class UseDOM
private Document outputDoc;
private Element body;
private Element html;
private StringBuffer out;
private HashMap authors = new HashMap();
public String toString()
{ if ( html != null )
{ out = new StringBuffer( "" );
print( html );
return out.toString();
return super.toString();
public void processWithDOM(String urlString) throws Exception
System.out.println( "Processing URL " + urlString );
// Create a new DOM parser object, by first creating a factory object:
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
// Create the DOM (a "Document" object):
Document doc = db.parse(urlString);
// Get the root element from the xml:
Element elem = doc.getDocumentElement();
// Get a list of all "author tags in the document:
NodeList authorList = elem.getElementsByTagName( "author" );
for ( int i = 0; i < authorList.getLength(); ++i )
// Author tags have two attributes (and no data), id and fullName.
// Use these to build a HashMap of names, using the id as the key:
Element author = (Element) authorList.item( i );
String id = author.getAttribute( "id" );
String fullName = author.getAttribute( "fullName" );
// Create an HTML "H2" tag, followed by the author's name as text:
Element h2 = outputDoc.createElement( "H2" );
body.appendChild( h2 );
h2.appendChild( outputDoc.createTextNode( "by " + fullName ) );
// For each author we will add a list of tips, so create an HTML "OL"
// element, add to output DOM:
Element list = outputDoc.createElement( "OL" );
body.appendChild( list );
// Finally add the (empty) OL list of tips to the HashMap,
// using the author's ID as the key:
authors.put( id, list );
// Now build a list of all "tip" elements from the input:
NodeList tipsList = elem.getElementsByTagName( "tip" );
for ( int i = 0; i < tipsList.getLength(); ++i )
{ // Fetch the tip and get its attributes:
Element tip = (Element) tipsList.item( i );
String title = tip.getAttribute( "title" );
String htmlURL = tip.getAttribute( "htmlURL" );
String author = tip.getAttribute( "author" );
// Append to the OL list of tips from the HashMap for this author:
Node list = (Node) authors.get( author );
Node item = list.appendChild( outputDoc.createElement( "LI" ) );
// Each tip uses the title as the link text:
Element link = outputDoc.createElement( "A" );
item.appendChild( link );
link.appendChild( outputDoc.createTextNode( title ) );
link.setAttribute( "HREF", htmlURL );
// This method builds (creates) an initial DOM for the output:
public void createHTMLDoc ( String heading )
throws ParserConfigurationException
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder(); // Can these be reused?
outputDoc = db.newDocument();
html = outputDoc.createElement( "HTML" );
outputDoc.appendChild( html );
body = outputDoc.createElement( "BODY" );
html.appendChild( body );
Element h1 = outputDoc.createElement( "H1" );
body.appendChild( h1 );
h1.appendChild( outputDoc.createTextNode( heading ) );
// Prints the specified node, recursively.
public void print ( Node node )
// is there anything to do?
if ( node == null )
int type = node.getNodeType();
switch ( type )
case Node.DOCUMENT_NODE: { // print document
NodeList children = node.getChildNodes();
for ( int iChild = 0; iChild < children.getLength(); ++iChild )
print( children.item( iChild ) );
case Node.ELEMENT_NODE: { // print element with attributes
out.append( '<' );
out.append( node.getNodeName() );
Attr attrs[] = sortAttributes( node.getAttributes() );
for ( int i = 0; i < attrs.length; ++i )
{ Attr attr = attrs[i];
out.append( ' ' );
out.append( attr.getNodeName() );
out.append( "=\"" );
out.append( normalize( attr.getNodeValue() ) );
out.append( '"' );
out.append( '>' );
NodeList children = node.getChildNodes();
if ( children != null )
{ int len = children.getLength();
for ( int i = 0; i < len; ++i )
{ print(children.item(i));
case Node.ENTITY_REFERENCE_NODE: { // handle entity reference nodes
out.append( '&' );
out.append( node.getNodeName() );
out.append( ';' );
case Node.CDATA_SECTION_NODE: { // print cdata sections
out.append( "" );
case Node.TEXT_NODE: { // print text
out.append( normalize( node.getNodeValue() ) );
case Node.PROCESSING_INSTRUCTION_NODE: { // print processing instruction
out.append( "" );
out.append( node.getNodeName() );
String data = node.getNodeValue();
if ( data != null && data.length() > 0 )
{ out.append( ' ' );
out.append( data );
out.append( "?>\n" ); // on DOS systems should use "\r\n".
} // End of switch
if ( type == Node.ELEMENT_NODE )
{ out.append( "" );
out.append( node.getNodeName() );
out.append( '>' );
} // print(Node)
// Returns a sorted list of attributes. Note Java2 has an Array.sort method!
protected Attr [] sortAttributes ( NamedNodeMap attrs )
int len = (attrs != null) ? attrs.getLength() : 0;
Attr array[] = new Attr[len];
for ( int i = 0; i < len; ++i )
{ array[i] = (Attr)attrs.item(i);
for ( int i = 0; i < len - 1; ++i )
{ String name = array[i].getNodeName();
int index = i;
for ( int j = i + 1; j < len; ++j )
{ String curName = array[j].getNodeName();
if ( curName.compareTo(name) < 0 )
{ name = curName;
index = j;
if ( index != i )
{ Attr temp = array[i];
array[i] = array[index];
array[index] = temp;
return ( array );
} // sortAttributes(NamedNodeMap):Attr[]
// Normalizes the given string.
protected String normalize ( String s )
StringBuffer str = new StringBuffer();
int len = (s != null) ? s.length() : 0;
for ( int i = 0; i < len; ++i )
{ char ch = s.charAt(i);
switch ( ch )
case '<': {
case '>': {
case '&': {
case '"': {
case '\r': break; // replace "\r\n" with "\n" (hopefully!?)
case '\n':
default: {
} // End of switch.
} // normalize(String):String
public static void main ( String [] args )
{ try
{ UseDOM ud = new UseDOM();
ud.createHTMLDoc( "JDC Tech Tips Archive" );
ud.processWithDOM( args[0] );
String htmlOut = ud.toString();
System.out.println( "Saving result to " + args[1] );
FileWriter fw = new FileWriter( args[1] );
fw.write( htmlOut, 0, htmlOut.length() );
catch ( Throwable t ) { t.printStackTrace(); }