XmlUtils.java

/*
 * XmlUtils
 *
 * $Id$
 * $HeadURL$
 */
package gov.usgs.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TimeZone;

import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.datatype.DatatypeFactory;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * Xml parsing utility functions.
 *
 * @author jmfee
 *
 */
public class XmlUtils {

  /** Hashmap of ESCAPES */
  public static final Map<String, String> ESCAPES = new HashMap<String, String>();
  static {
    // xml
    ESCAPES.put("&", "&amp;");
    ESCAPES.put("<", "&lt;");
    ESCAPES.put(">", "&gt;");
    ESCAPES.put("\"", "&quot;");
    ESCAPES.put("'", "&apos;");
    // whitespace characters
    ESCAPES.put("\t", "&#x9;"); // tab
    ESCAPES.put("\n", "&#xA;"); // newline
    ESCAPES.put("\r", "&#xD;"); // carriage return
  }

  /**
   * Convenience method to format a Date as an XML DateTime String.
   *
   * @param date the date to format.
   * @return the XML representation as a string.
   */
  public static String formatDate(final Date date) {
    if (date == null) {
      return null;
    }
    GregorianCalendar calendar = new GregorianCalendar();
    calendar.setTimeInMillis(date.getTime());
    return formatGregorianCalendar(calendar);
  }

  /**
   * Format a Gregorian Calendar as an XML DateTime String.
   *
   * @param calendar the calendar to format.
   * @return the XML representation as a string.
   */
  public static String formatGregorianCalendar(final GregorianCalendar calendar) {
    try {
      return DatatypeFactory.newInstance().newXMLGregorianCalendar(calendar).normalize().toXMLFormat();
    } catch (Exception e) {
      return null;
    }
  }

  /**
   * Convenience method to parse an XML Date Time into a Date. Only useful when
   * the XML Date Time is within the Date object time range.
   *
   * @param toParse the xml date time string to parse.
   * @return the parsed Date object.
   */
  public static Date getDate(final String toParse) {
    XMLGregorianCalendar calendar = getXMLGregorianCalendar(toParse);
    if (calendar != null) {
      return new Date(calendar.toGregorianCalendar().getTimeInMillis());
    } else {
      return null;
    }
  }

  /**
   * Parse an XML Date Time into an XMLGregorianCalendar.
   *
   * @param toParse the xml date time string to parse.
   * @return the parsed XMLGregorianCalendar object.
   */
  public static XMLGregorianCalendar getXMLGregorianCalendar(final String toParse) {
    try {
      return DatatypeFactory.newInstance().newXMLGregorianCalendar(toParse);
    } catch (Exception e) {
      return null;
    }
  }

  /**
   * Converts an XMLGregorianCalendar to a Date.
   *
   * @param xmlDate XMLGregorianCalendar to convert.
   * @return corresponding date object.
   */
  public static Date getDate(final XMLGregorianCalendar xmlDate) {
    // is this equivalent to getDate(String) processing above??

    // start with UTC, i.e. no daylight savings time.
    TimeZone timezone = TimeZone.getTimeZone("GMT");

    // adjust timezone to match xmldate
    int offsetMinutes = xmlDate.getTimezone();
    if (offsetMinutes != DatatypeConstants.FIELD_UNDEFINED) {
      timezone.setRawOffset(
          // convert minutes to milliseconds
          offsetMinutes * 60 // seconds per minute
              * 1000 // milliseconds per second
      );
    }

    // use calendar so parsed date will be UTC
    Calendar calendar = Calendar.getInstance(timezone);
    calendar.clear();
    calendar.set(xmlDate.getYear(),
        // xmlcalendar is 1 based, calender is 0 based
        xmlDate.getMonth() - 1, xmlDate.getDay(), xmlDate.getHour(), xmlDate.getMinute(), xmlDate.getSecond());
    Date date = calendar.getTime();
    int millis = xmlDate.getMillisecond();
    if (millis != DatatypeConstants.FIELD_UNDEFINED) {
      calendar.setTimeInMillis(calendar.getTimeInMillis() + millis);
    }

    return date;
  }

  /**
   * Creates an XMLReader and uses handler as a content and error handler.
   *
   * @param xml     source of xml.
   * @param handler SAX handler for xml.
   * @throws SAXException                 if any exceptions occur during parsing.
   * @throws IOException                  if unable to convert xml to an
   *                                      inputstream.
   * @throws ParserConfigurationException if unable to create a namespace aware
   *                                      parser.
   */
  public static void parse(final Object xml, final DefaultHandler handler)
      throws SAXException, IOException, ParserConfigurationException {
    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setNamespaceAware(true);
    SAXParser sp = spf.newSAXParser();
    XMLReader xr = sp.getXMLReader();
    xr.setContentHandler(handler);
    xr.setErrorHandler(handler);
    InputStream in = StreamUtils.getInputStream(xml);
    try {
      xr.parse(new InputSource(in));
    } finally {
      StreamUtils.closeStream(in);
    }
  }

  /**
   * Sometimes parsers do not preserve the namespace for attributes. This attempts
   * to use the namespace and localname, and, if not available using the
   * namespace, checks for an attribute using only localname.
   *
   * @param attributes Attributes object to search.
   * @param uri        namespace of attribute.
   * @param localName  local name of attribute.
   * @return value of attribute.
   */
  public static String getAttribute(final Attributes attributes, final String uri, final String localName) {
    String value = attributes.getValue(uri, localName);
    if (value == null) {
      value = attributes.getValue(localName);
    }
    return value;
  }

  /**
   * Escape a value when writing XML.
   *
   * Replaces each character in the ESCAPES map with its escaped value.
   *
   * This method should only be used when generating xml manually, since most xml
   * writers escape automatically.
   *
   * @param value the value to escape
   * @return the escaped value.
   */
  public static String escape(final String value) {
    String escapedValue = value;

    // replace each escapeable character
    Iterator<String> iter = ESCAPES.keySet().iterator();
    while (iter.hasNext()) {
      String raw = iter.next();
      String escaped = ESCAPES.get(raw);
      escapedValue = escapedValue.replace(raw, escaped);
    }

    return escapedValue;
  }

  /**
   * Unescape a value when reading XML.
   *
   * Replaces each escaped character in the ESCAPES map with its unescaped value.
   *
   * This method should only be used when parsing xml manually, since most xml
   * parsers unescape automatically.
   *
   * @param value the value to unescape
   * @return the unescaped value.
   */
  public static String unescape(final String value) {
    String unescapedValue = value;

    // replace each escapeable character
    Iterator<String> iter = ESCAPES.keySet().iterator();
    while (iter.hasNext()) {
      String raw = iter.next();
      String escaped = ESCAPES.get(raw);
      unescapedValue = unescapedValue.replace(escaped, raw);
    }

    return unescapedValue;
  }
}