package experiments.webclassify.firstexperiments;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
public class ExtractWikipediaText implements ContentHandler {
private StringBuffer documentText;
public ExtractWikipediaText() {
super();
this.documentText = new StringBuffer();
}
@Override
public void characters(char[] arg0, int arg1, int arg2)
throws SAXException {
documentText.append(new String(arg0, arg1, arg2));
}
@Override
public void endDocument() throws SAXException {
}
@Override
public void endElement(String arg0, String arg1, String arg2)
throws SAXException {
}
@Override
public void endPrefixMapping(String arg0) throws SAXException {
// TODO Auto-generated method stub
}
@Override
public void ignorableWhitespace(char[] arg0, int arg1, int arg2)
throws SAXException {
// TODO Auto-generated method stub
}
@Override
public void processingInstruction(String arg0, String arg1)
throws SAXException {
// TODO Auto-generated method stub
}
@Override
public void setDocumentLocator(Locator arg0) {
// TODO Auto-generated method stub
}
@Override
public void skippedEntity(String arg0) throws SAXException {
// TODO Auto-generated method stub
}
@Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
}
@Override
public void startElement(String arg0, String arg1, String arg2,
Attributes arg3) throws SAXException {
}
@Override
public void startPrefixMapping(String arg0, String arg1)
throws SAXException {
// TODO Auto-generated method stub
}
public String getDocumentText() {
return documentText.toString();
}
}