/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.request;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.filter.Filters;
import org.jdom2.input.JDOMParseException;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import org.wikipediacleaner.api.APIException;
import org.wikipediacleaner.api.constants.EnumWikipedia;
import org.wikipediacleaner.api.data.DataManager;
import org.wikipediacleaner.api.data.Page;
import org.wikipediacleaner.utils.Configuration;
import org.wikipediacleaner.utils.ConfigurationValueBoolean;
/**
* MediaWiki API XML results.
*/
public abstract class ApiXmlResult extends BasicApiResult {
// ==========================================================================
// Configuration
// ==========================================================================
/**
* Flag for tracing XML.
*/
private static boolean DEBUG_XML = false;
/**
* Update configuration.
*/
public static void updateConfiguration() {
Configuration config = Configuration.getConfiguration();
DEBUG_XML = config.getBoolean(
null, ConfigurationValueBoolean.DEBUG_API);
}
// ==========================================================================
// XML Results
// ==========================================================================
/**
* @param wiki Wiki on which requests are made.
* @param httpClient HTTP client for making requests.
*/
public ApiXmlResult(
EnumWikipedia wiki,
HttpClient httpClient) {
super(wiki, httpClient);
}
/**
* @return Format of the XML result.
*/
@Override
public String getFormat() {
return ApiRequest.FORMAT_XML;
}
/**
* Send a request to MediaWiki API.
*
* @param properties Properties defining the request.
* @param maxTry Maximum number of tries.
* @return Answer of MediaWiki API.
* @throws JDOMParseException
* @throws APIException
*/
protected Element getRoot(
Map<String, String> properties,
int maxTry)
throws JDOMParseException, APIException {
int attempt = 0;
for (;;) {
Element root = null;
HttpMethod method = null;
InputStream stream = null;
try {
// Executing HTTP method
attempt++;
method = createHttpMethod(properties);
int statusCode = getHttpClient().executeMethod(method);
// Accessing response
stream = method.getResponseBodyAsStream();
stream = new BufferedInputStream(stream);
Header contentEncoding = method.getResponseHeader("Content-Encoding");
if (contentEncoding != null) {
if (contentEncoding.getValue().equals("gzip")) {
stream = new GZIPInputStream(stream);
}
}
// for (Header header : method.getRequestHeaders()) {
// System.out.println("Request header:" + header);
// }
// for (Header header : method.getResponseHeaders()) {
// System.out.println("Response header:" + header);
// }
// Read the response
if (statusCode == HttpStatus.SC_OK){
SAXBuilder sxb = new SAXBuilder();
Document document = sxb.build(stream);
traceDocument(document);
root = document.getRootElement();
checkForError(root);
} else {
try {
while (stream.read() >= 0) {
//
}
} catch (IOException e) {
//
}
}
// Act depending on the status
if (statusCode != HttpStatus.SC_OK) {
String message = "URL access returned " + HttpStatus.getStatusText(statusCode);
log.error(message);
if (attempt > maxTry) {
log.warn("Error. Maximum attempts count reached.");
throw new APIException(message);
}
try {
Thread.sleep(30000);
} catch (InterruptedException e) {
// Nothing
}
} else {
return root;
}
} catch (JDOMException e) {
String message = "JDOMException: " + e.getMessage();
log.error(message);
if (attempt > maxTry) {
log.warn("Error. Maximum attempts count reached.");
throw new APIException("Error parsing XML result", e);
}
try {
Thread.sleep(30000);
} catch (InterruptedException e2) {
// Nothing
}
} catch (IOException e) {
String message = "IOException: " + e.getMessage();
log.error(message);
if (attempt > maxTry) {
log.warn("Error. Maximum attempts count reached.");
throw new APIException("Error accessing MediaWiki", e);
}
try {
Thread.sleep(30000);
} catch (InterruptedException e2) {
// Nothing
}
} catch (APIException e) {
if (!e.shouldRetry() || (attempt > e.getMaxRetry())) {
throw e;
}
e.waitForRetry();
} finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
log.warn("Error closing stream");
}
}
if (method != null) {
method.releaseConnection();
}
}
log.warn("Error. Trying again");
}
}
/**
* Check for errors reported by the API.
*
* @param root Document root.
* @throws APIException
*/
protected void checkForError(Element root) throws APIException {
if (root == null) {
return;
}
// Check for errors
XPathExpression<Element> xpa = XPathFactory.instance().compile(
"/api/error", Filters.element());
List<Element> listErrors = xpa.evaluate(root);
if (listErrors != null) {
Iterator<Element> iterErrors = listErrors.iterator();
while (iterErrors.hasNext()) {
Element currentNode = iterErrors.next();
String text =
"Error reported: " +
currentNode.getAttributeValue("code") + " - " +
currentNode.getAttributeValue("info");
log.warn(text);
throw new APIException(text, currentNode.getAttributeValue("code"));
}
}
// Check for warnings
xpa = XPathFactory.instance().compile(
"/api/warnings/*", Filters.element());
List<Element> listWarnings = xpa.evaluate(root);
if (listWarnings != null) {
Iterator<Element> iterWarnings = listWarnings.iterator();
while (iterWarnings.hasNext()) {
Element currentNode = iterWarnings.next();
log.warn("Warning reported: " + currentNode.getName() + " - " + currentNode.getValue());
}
}
}
/**
* Manage query-continue in request.
*
* @param root Root of the DOM tree.
* @param queryContinue XPath query to the query-continue node.
* @param properties Properties defining request.
* @return True if request should be continued.
*/
protected boolean shouldContinue(
Element root, String queryContinue,
Map<String, String> properties) {
if ((root == null) || (queryContinue == null)) {
return false;
}
boolean result = false;
XPathExpression<Element> xpa = XPathFactory.instance().compile(
queryContinue, Filters.element());
List<Element> results = xpa.evaluate(root);
if ((results == null) || (results.isEmpty())) {
xpa = XPathFactory.instance().compile(
"/api/continue", Filters.element());
results = xpa.evaluate(root);
}
if (results != null) {
for (Object currentNode : results) {
List attributes = ((Element) currentNode).getAttributes();
if (attributes != null) {
for (Object currentAttribute : attributes) {
Attribute attribute = (Attribute) currentAttribute;
properties.put(attribute.getName(), attribute.getValue());
result = true;
}
}
}
}
return result;
}
/**
* Get a page corresponding to a page node.
*
* @param wiki Wiki.
* @param pageNode Page node.
* @param knownPages Already known pages.
* @param useDisambig True if disambiguation property should be used.
* @return Page.
*/
protected static Page getPage(
EnumWikipedia wiki,
Element pageNode, List<Page> knownPages,
boolean useDisambig) {
if (pageNode == null) {
return null;
}
String title = pageNode.getAttributeValue("title");
Attribute pageIdAttr = pageNode.getAttribute("pageid");
Integer pageId = null;
if (pageIdAttr != null) {
try {
String tmp = pageIdAttr.getValue();
pageId = Integer.valueOf(tmp);
} catch (NumberFormatException e) {
//
}
}
String revisionId = pageNode.getAttributeValue("lastrevid");
Page page = DataManager.getPage(wiki, title, pageId, revisionId, knownPages);
page.setNamespace(pageNode.getAttributeValue("ns"));
if (pageNode.getAttribute("missing") != null) {
page.setExisting(Boolean.FALSE);
} else if (pageId != null) {
page.setExisting(Boolean.TRUE);
}
if (pageNode.getAttribute("redirect") != null) {
page.isRedirect(true);
}
if (useDisambig) {
Element pageProps = pageNode.getChild("pageprops");
boolean dabPage = (pageProps != null) && (pageProps.getAttribute("disambiguation") != null);
page.setDisambiguationPage(Boolean.valueOf(dabPage));
}
return page;
}
/**
* Formatter for XML output.
*/
private static XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
/**
* Trace a document contents.
*
* @param doc Document.
*/
private void traceDocument(Document doc) {
if (DEBUG_XML) {
if (xmlOutputter == null) {
xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
}
try {
System.out.println("********** START OF DOCUMENT **********");
xmlOutputter.output(doc, System.out);
System.out.println("********** END OF DOCUMENT **********");
} catch (IOException e) {
// Nothing to do
}
}
}
}