package org.gbif.dwca.io;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* A simple metadata sax base handler that collects all character data inside elements into a string buffer, resetting
* the buffer with every element start and storing the string version of the buffer in this.content when the end of the
* element is reached.
* Make sure to call the super methods when implementing this handler!
*
* @author markus
*
*/
public abstract class SimpleSaxHandler extends DefaultHandler {
protected final Logger log = LoggerFactory.getLogger(getClass());
protected String content;
private StringBuffer chars;
private Pattern normWhitespace = Pattern.compile("\\s+");
protected String parents = "";
@Override
public void characters(char[] ch, int start, int length) {
chars.append(ch, start, length);
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
content = StringUtils.trimToNull(chars.toString());
// norm whitespace
if (content != null) {
content = normWhitespace.matcher(content).replaceAll(" ");
}
parents = parents.substring(0, parents.length() - localName.length() - 1);
}
@Override
public void startDocument() {
parents = "";
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
chars = new StringBuffer();
parents += "/" + localName.toLowerCase();
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}