package com.knowledgetree.textextraction; //import java.util.regex.*; import java.lang.Character; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.Locator; import org.xml.sax.SAXException; public class StringHandler implements ContentHandler { private String buffer; public StringHandler() { this.buffer = new String(); } public void characters(char[] ch, int start, int length) throws SAXException { for(int i=0;i<ch.length;i++) { // skip control characters in output, don't want these for // a text extraction and they cause issues with the xml response if (!Character.isISOControl(ch[i])) { this.buffer += ch[i]; } } } public void endDocument() throws SAXException { // TODO Auto-generated method stub } public void endElement(String uri, String localName, String name) throws SAXException { // TODO Auto-generated method stub this.buffer += " "; } public void endPrefixMapping(String prefix) throws SAXException { // TODO Auto-generated method stub } public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { // TODO Auto-generated method stub } public void processingInstruction(String target, String data) throws SAXException { // TODO Auto-generated method stub } public void setDocumentLocator(Locator locator) { // TODO Auto-generated method stub } public void skippedEntity(String name) throws SAXException { // TODO Auto-generated method stub } public void startDocument() throws SAXException { // TODO Auto-generated method stub } public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException { // TODO Auto-generated method stub /* For now we will store the data in a private buffer */ int l = atts.getLength(); for(int i=0;i<l;i++) { this.buffer += " " + atts.getValue(i) ; } } public void startPrefixMapping(String prefix, String uri) throws SAXException { // TODO Auto-generated method stub } public String getString() { return this.buffer; } }