/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2016 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.dump;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* SAX handler for wiki dumps.
*/
public class DumpHandler extends DefaultHandler {
/** Internal flag when parsing is in a page */
public boolean isInPage;
/** Handler for page information */
public PageHandler pageHandler;
public DumpHandler() {
isInPage = false;
pageHandler = new PageHandler();
}
/**
* @param processor Page processor.
*/
public void setPageProcessor(PageProcessor processor) {
if (pageHandler != null) {
pageHandler.setPageProcessor(processor);
}
}
/**
* Receive notification of the start of an element.
*
* <p>By default, do nothing. Application writers may override this
* method in a subclass to take specific actions at the start of
* each element (such as allocating a new tree node or writing
* output to a file).</p>
*
* @param uri The Namespace URI, or the empty string if the
* element has no Namespace URI or if Namespace
* processing is not being performed.
* @param localName The local name (without prefix), or the
* empty string if Namespace processing is not being
* performed.
* @param qName The qualified name (with prefix), or the
* empty string if qualified names are not available.
* @param attributes The attributes attached to the element. If
* there are no attributes, it shall be an empty
* Attributes object.
* @exception org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
* @see org.xml.sax.ContentHandler#startElement
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (!isInPage && qName.equalsIgnoreCase("page")) {
isInPage = true;
}
if (isInPage) {
if (pageHandler != null) {
pageHandler.startElement(uri, localName, qName, attributes);
}
}
}
/**
* Receive notification of the end of an element.
*
* <p>By default, do nothing. Application writers may override this
* method in a subclass to take specific actions at the end of
* each element (such as finalising a tree node or writing
* output to a file).</p>
*
* @param uri The Namespace URI, or the empty string if the
* element has no Namespace URI or if Namespace
* processing is not being performed.
* @param localName The local name (without prefix), or the
* empty string if Namespace processing is not being
* performed.
* @param qName The qualified name (with prefix), or the
* empty string if qualified names are not available.
* @exception org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
* @see org.xml.sax.ContentHandler#endElement
*/
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (isInPage) {
if (pageHandler != null) {
pageHandler.endElement(uri, localName, qName);
}
}
if (isInPage && qName.equalsIgnoreCase("page")) {
isInPage = false;
}
}
/**
* Receive notification of character data inside an element.
*
* <p>By default, do nothing. Application writers may override this
* method to take specific actions for each chunk of character data
* (such as adding the data to a node or buffer, or printing it to
* a file).</p>
*
* @param ch The characters.
* @param start The start position in the character array.
* @param length The number of characters to use from the
* character array.
* @exception org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
* @see org.xml.sax.ContentHandler#characters
*/
@Override
public void characters(char ch[], int start, int length) throws SAXException {
if (isInPage) {
if (pageHandler != null) {
pageHandler.characters(ch, start, length);
}
}
}
}