/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.request.query.prop; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.httpclient.HttpClient; import org.jdom2.Attribute; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.filter.Filters; import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathFactory; import org.wikipediacleaner.api.APIException; import org.wikipediacleaner.api.constants.EnumWikipedia; import org.wikipediacleaner.api.data.DataManager; import org.wikipediacleaner.api.data.Page; import org.wikipediacleaner.api.request.ApiRequest; import org.wikipediacleaner.api.request.ApiXmlResult; /** * MediaWiki API XML results for properties. */ public class ApiXmlPropertiesResult extends ApiXmlResult implements ApiPropertiesResult { /** * @param wiki Wiki on which requests are made. * @param httpClient HTTP client for making requests. */ public ApiXmlPropertiesResult( EnumWikipedia wiki, HttpClient httpClient) { super(wiki, httpClient); } /** * Update page information. * * @param node Element for the page. * @param page Page. * @throws JDOMException */ public void updatePageInformation(Element node, Page page) throws JDOMException { // Retrieve basic page information Attribute attrPageId = node.getAttribute("pageid"); if (attrPageId != null) { page.setPageId(attrPageId.getValue()); } Attribute attrTitle = node.getAttribute("title"); if (attrTitle != null) { page.setTitle(attrTitle.getValue()); } page.setStartTimestamp(node.getAttributeValue("starttimestamp")); Attribute attrRedirect = node.getAttribute("redirect"); if (attrRedirect != null) { page.isRedirect(true); } Attribute attrMissing = node.getAttribute("missing"); if (attrMissing != null) { page.setExisting(Boolean.FALSE); } // Retrieve protection information XPathExpression<Element> xpaProtection = XPathFactory.instance().compile( "protection/pr[@type=\"edit\"]", Filters.element()); List<Element> protectionNodes = xpaProtection.evaluate(node); for (Element protectionNode : protectionNodes) { if ("edit".equals(protectionNode.getAttributeValue("type"))) { page.setEditProtectionLevel(protectionNode.getAttributeValue("level")); } } } /** * Execute redirect request. * * @param properties Properties defining request. * @param pages Pages to be filled with redirect information. * @throws APIException */ @Override public void executeRedirect( Map<String, String> properties, List<Page> pages) throws APIException { try { Element root = getRoot(properties, ApiRequest.MAX_ATTEMPTS); // Manage redirects and missing pages updateRedirect(root, pages); } catch (JDOMException e) { log.error("Error loading redirects", e); throw new APIException("Error parsing XML", e); } } /** * Retrieve information about page title normalization. * * @param root Root element. * @param normalization Map containing information about title normalization (From => To). * @throws JDOMException */ public void retrieveNormalization( Element root, Map<String, String> normalization) throws JDOMException { if (normalization == null) { return; } XPathExpression<Element> xpaNormalized = XPathFactory.instance().compile( "/api/query/normalized/n", Filters.element()); List<Element> listNormalized = xpaNormalized.evaluate(root); if ((listNormalized == null) || (listNormalized.isEmpty())) { return; } Iterator<Element> itNormalized = listNormalized.iterator(); while (itNormalized.hasNext()) { Element normalized = itNormalized.next(); String from = normalized.getAttributeValue("from"); String to = normalized.getAttributeValue("to"); if ((from != null) && (to != null)) { normalization.put(from, to); } } } /** * Retrieve the normalized title of a page. * * @param title Title. * @param normalization Normalization information. * @return Normalized title. */ public String getNormalizedTitle(String title, Map<String, String> normalization) { if ((title == null) || (normalization == null)) { return title; } String tmp = normalization.get(title); if (tmp != null) { return tmp; } return title; } /** * Update redirect and missing information of a list of pages. * * @param root Root element. * @param pages List of pages. * @throws JDOMException */ public void updateRedirect(Element root, Collection<Page> pages) throws JDOMException { // Retrieving redirects XPathExpression<Element> xpaRedirects = XPathFactory.instance().compile( "/api/query/redirects/r", Filters.element()); List<Element> listRedirects = xpaRedirects.evaluate(root); // Retrieving pages XPathExpression<Element> xpaPages = XPathFactory.instance().compile( "/api/query/pages/page", Filters.element()); List<Element> listPages = xpaPages.evaluate(root); // Retrieving normalization information Map<String, String> normalization = new HashMap<String, String>(); retrieveNormalization(root, normalization); // Analyzing redirects Iterator<Element> itRedirect = listRedirects.iterator(); while (itRedirect.hasNext()) { Element currentRedirect = itRedirect.next(); String fromPage = currentRedirect.getAttributeValue("from"); String toPage = currentRedirect.getAttributeValue("to"); for (Page p : pages) { // Find if the redirect is already taken into account boolean exists = false; Iterator<Page> itPage = p.getRedirectIteratorWithPage(); while (itPage.hasNext()) { Page tmp = itPage.next(); String title = getNormalizedTitle(tmp.getTitle(), normalization); if (Page.areSameTitle(title, toPage)) { exists = true; } } // Add the redirect if needed itPage = p.getRedirectIteratorWithPage(); while (itPage.hasNext()) { Page tmp = itPage.next(); String title = getNormalizedTitle(tmp.getTitle(), normalization); if (!exists && Page.areSameTitle(title, fromPage)) { Element to = null; for (Element page : listPages) { if ((to == null) && Page.areSameTitle(toPage, page.getAttributeValue("title"))) { to = page; } } if (to != null) { Page pageTo = DataManager.getPage( p.getWikipedia(), to.getAttributeValue("title"), null, null, null); pageTo.setNamespace(to.getAttributeValue("ns")); pageTo.setPageId(to.getAttributeValue("pageid")); p.addRedirect(pageTo); } } } } } // Analyzing missing pages for (Page p : pages) { Iterator<Page> itPage = p.getRedirectIteratorWithPage(); while (itPage.hasNext()) { Page tmp = itPage.next(); String title = getNormalizedTitle(tmp.getTitle(), normalization); Element page = null; for (Element tmpPage : listPages) { if ((page == null) && title.equals(tmpPage.getAttributeValue("title"))) { page = tmpPage; } } if (page != null) { if (page.getAttributeValue("pageid") != null) { tmp.setExisting(Boolean.TRUE); } else { Attribute attrMissing = page.getAttribute("missing"); if (attrMissing != null) { tmp.setExisting(Boolean.FALSE); } } } } } } }