/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.wikipediacleaner.api.check.CheckError;
import org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithm;
import org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithms;
import org.wikipediacleaner.api.constants.EnumWikipedia;
import org.wikipediacleaner.api.data.AutomaticFixing;
import org.wikipediacleaner.api.data.AutomaticFormatter;
import org.wikipediacleaner.api.data.Page;
import org.wikipediacleaner.api.execution.AllLinksToPageCallable;
import org.wikipediacleaner.api.execution.ContentsCallable;
import org.wikipediacleaner.api.execution.DisambiguationStatusCallable;
import org.wikipediacleaner.api.execution.EmbeddedInCallable;
import org.wikipediacleaner.api.execution.ExpandTemplatesCallable;
import org.wikipediacleaner.api.execution.LinksWRCallable;
import org.wikipediacleaner.api.execution.ParseTextCallable;
import org.wikipediacleaner.api.execution.TemplatesCallable;
import org.wikipediacleaner.gui.swing.worker.UpdateDabWarningTools;
import org.wikipediacleaner.i18n.GT;
import org.wikipediacleaner.utils.Configuration;
import org.wikipediacleaner.utils.ConfigurationValueBoolean;
import org.wikipediacleaner.utils.ConfigurationValueInteger;
/**
* Centralization of access to MediaWiki.
*/
public class MediaWiki extends MediaWikiController {
/**
* @param listener Listener to MediaWiki events.
* @return Access to MediaWiki.
*/
static public MediaWiki getMediaWikiAccess(MediaWikiListener listener) {
MediaWiki mw = new MediaWiki(listener);
return mw;
}
/**
* @param listener Listener.
*/
private MediaWiki(MediaWikiListener listener) {
super(listener);
}
/**
* Block until all tasks are finished.
*
* @throws APIException
*/
public void block(boolean block) throws APIException {
if (block) {
while (hasRemainingTask() && !shouldStop()) {
getNextResult();
}
}
if (shouldStop()) {
stopRemainingTasks();
}
}
/**
* Retrieve page contents.
*
* @param wikipedia Wikipedia.
* @param page Page.
* @param block Flag indicating if the call should block until completed.
* @param returnPage Flag indicating if the page should be returned once task is finished.
* @param usePageId True if page identifiers should be used.
* @param withRedirects Flag indicating if redirects information should be retrieved.
* @param doAnalysis True if page analysis should be done.
* @throws APIException
*/
public void retrieveContents(
EnumWikipedia wikipedia, Page page,
boolean block, boolean returnPage,
boolean usePageId, boolean withRedirects,
boolean doAnalysis) throws APIException {
if (page == null) {
return;
}
final API api = APIFactory.getAPI();
addTask(new ContentsCallable(
wikipedia, this, api,
page, returnPage ? page : null,
usePageId, withRedirects, null,
doAnalysis));
block(block);
}
/**
* Retrieve page contents.
*
* @param wikipedia Wikipedia.
* @param pages Pages.
* @param block Flag indicating if the call should block until completed.
* @param usePageId True if page identifiers should be used.
* @param withRedirects Flag indicating if redirects information should be retrieved.
* @param doAnalysis True if page analysis should be done.
* @throws APIException
*/
public void retrieveContents(
EnumWikipedia wikipedia, Collection<Page> pages,
boolean block, boolean usePageId, boolean withRedirects,
boolean doAnalysis) throws APIException {
if (pages == null) {
return;
}
final API api = APIFactory.getAPI();
for (Page page : pages) {
addTask(new ContentsCallable(
wikipedia, this, api,
page, null,
usePageId, withRedirects, null,
doAnalysis));
}
block(block);
}
/**
* Retrieve page section contents.
*
* @param wikipedia Wikipedia.
* @param pages Pages.
* @param section Section.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveSectionContents(
EnumWikipedia wikipedia, Collection<Page> pages,
int section, boolean block) throws APIException {
if (pages == null) {
return;
}
final API api = APIFactory.getAPI();
for (Page page : pages) {
addTask(new ContentsCallable(
wikipedia, this, api,
page, null,
false, false, Integer.valueOf(section),
false));
}
block(block);
}
/**
* Replace text in a list of pages.
*
* @param pages List of pages.
* @param replacements List of text replacements
* Key: Additional comments used for the modification.
* Value: Text replacements.
* @param wiki Wiki.
* @param comment Comment used for the modification.
* @param description (Out) description of changes made.
* @param automaticCW True if automatic Check Wiki fixing should be done also.
* @param forceCW True if Check Wiki fixing should be done even if no automatic replacement was done.
* @param save True if modification should be saved.
* @return Count of modified pages.
* @throws APIException
*/
public int replaceText(
Page[] pages, Map<String, List<AutomaticFixing>> replacements,
EnumWikipedia wiki, String comment,
StringBuilder description,
boolean automaticCW, boolean forceCW, boolean save,
boolean updateDabWarning) throws APIException {
if ((pages == null) || (replacements == null) || (replacements.size() == 0)) {
return 0;
}
// Initialize page loading
Configuration config = Configuration.getConfiguration();
int nThreads = Math.max(
config.getInt(null, ConfigurationValueInteger.INTERROG_THREAD), 1);
int currentPage = 0;
while ((currentPage < nThreads) && (currentPage < pages.length)) {
retrieveContents(wiki, pages[currentPage], false, true, false, true, false); // TODO: withRedirects=false ?
pages[currentPage] = null; // To release memory
currentPage++;
}
// Analyze pages
boolean secured = config.getBoolean(null, ConfigurationValueBoolean.SECURE_URL);
UpdateDabWarningTools dabWarnings = new UpdateDabWarningTools(wiki, null, false, false);
int count = 0;
final API api = APIFactory.getAPI();
StringBuilder details = new StringBuilder();
StringBuilder fullComment = new StringBuilder();
StringBuilder tmpDescription = (description != null) ? new StringBuilder() : null;
while (hasRemainingTask() && !shouldStop()) {
Object result = getNextResult();
if (currentPage < pages.length) {
retrieveContents(wiki, pages[currentPage], false, true, false, true, false); // TODO: withRedirects=false ?
pages[currentPage] = null; // To release memory
currentPage++;
}
if ((result != null) && (result instanceof Page)) {
List<String> replacementsDone = new ArrayList<String>();
Page page = (Page) result;
String oldContents = page.getContents();
if (oldContents != null) {
String newContents = oldContents;
details.setLength(0);
fullComment.setLength(0);
if (tmpDescription != null) {
tmpDescription.setLength(0);
String title =
"<a href=\"" + wiki.getSettings().getURL(page.getTitle(), false, secured) + "\">" +
page.getTitle() + "</a>";
tmpDescription.append(GT._("Page {0}:", title));
tmpDescription.append("\n");
tmpDescription.append("<ul>\n");
}
// Apply automatic fixing
for (Entry<String, List<AutomaticFixing>> replacement : replacements.entrySet()) {
replacementsDone.clear();
String tmpContents = AutomaticFixing.apply(replacement.getValue(), newContents, replacementsDone);
if (!newContents.equals(tmpContents)) {
newContents = tmpContents;
// Update description
if (tmpDescription != null) {
for (String replacementDone : replacementsDone) {
tmpDescription.append("<li>");
tmpDescription.append(replacementDone.replaceAll("\\&", "&").replaceAll("\\<", "<"));
tmpDescription.append("</li>\n");
}
}
// Memorize replacement
if ((replacement.getKey() != null) && (replacement.getKey().length() > 0)) {
if (details.length() > 0) {
details.append(", ");
}
details.append(replacement.getKey());
}
}
}
fullComment.append(wiki.createUpdatePageComment(comment, details.toString()));
// Apply automatic Check Wiki fixing if needed
if (automaticCW) {
if (!oldContents.equals(newContents) || forceCW) {
String tmpContents = newContents;
List<CheckErrorAlgorithm> algorithms = CheckErrorAlgorithms.getAlgorithms(wiki);
List<CheckError.Progress> usedAlgorithms = new ArrayList<>();
newContents = AutomaticFormatter.tidyArticle(
page, newContents, algorithms, false, usedAlgorithms);
if (!usedAlgorithms.isEmpty()) {
fullComment.append(" / ");
fullComment.append(wiki.getCWConfiguration().getComment(usedAlgorithms));
if (tmpDescription != null) {
for (CheckError.Progress progress : usedAlgorithms) {
CheckErrorAlgorithm algorithm = progress.algorithm;
tmpDescription.append("<li>");
tmpDescription.append(algorithm.getShortDescriptionReplaced());
tmpDescription.append("</li>\n");
}
}
} else if (oldContents.equals(tmpContents)) {
// If no automatic modifications done before CW, don't keep the modifications for general tidiness
newContents = tmpContents;
}
}
}
// Page contents has been modified
if (!oldContents.equals(newContents)) {
if (tmpDescription != null) {
tmpDescription.append("</ul>\n");
if (description != null) {
description.append(tmpDescription);
}
}
// Save page
setText(GT._("Updating page {0}", page.getTitle()));
count++;
if (save) {
api.updatePage(wiki, page, newContents, fullComment.toString(), false, false);
if (updateDabWarning) {
List<Page> tmpList = new ArrayList<>(1);
tmpList.add(page);
dabWarnings.updateWarning(tmpList, null, null, null);
}
}
}
}
}
}
block(true);
return count;
}
/**
* Expand templates.
*
* @param wikipedia Wikipedia.
* @param title Title of the page.
* @param text Text of the page.
* @throws APIException
*/
public String expandTemplates(EnumWikipedia wikipedia, String title, String text) throws APIException {
if (text == null) {
return null;
}
final API api = APIFactory.getAPI();
addTask(new ExpandTemplatesCallable(wikipedia, this, api, title, text));
while (hasRemainingTask() && !shouldStop()) {
Object result = getNextResult();
if (result != null) {
return result.toString();
}
}
block(true);
return null;
}
/**
* Parse complete text.
*
* @param wikipedia Wikipedia.
* @param title Title of the page.
* @param text Text of the page.
* @throws APIException
*/
public String parseText(
EnumWikipedia wikipedia,
String title, String text) throws APIException {
if (text == null) {
return null;
}
final API api = APIFactory.getAPI();
addTask(new ParseTextCallable(wikipedia, this, api, title, text));
while (hasRemainingTask() && !shouldStop()) {
Object result = getNextResult();
if (result != null) {
return result.toString();
}
}
block(true);
return null;
}
/**
* Retrieve similar pages of a page.
*
* @param wikipedia Wikipedia.
* @param page Page.
* @throws APIException
*/
public void retrieveSimilarPages(
EnumWikipedia wikipedia,
Page page) throws APIException {
if (page == null) {
return;
}
final API api = APIFactory.getAPI();
api.retrieveSimilarPages(wikipedia, page, true);
}
/**
* Retrieve all links (with redirects) of a page.
*
* @param wikipedia Wikipedia.
* @param page Page.
* @param namespace If set, retrieve only links in this namespace.
* @param knownPages Already known pages.
* @param disambigNeeded True if disambiguation information is needed.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveAllLinks(
EnumWikipedia wikipedia,
Page page, Integer namespace,
List<Page> knownPages,
boolean disambigNeeded,
boolean block) throws APIException {
if (page == null) {
return;
}
final API api = APIFactory.getAPI();
addTask(new LinksWRCallable(wikipedia, this, api, page, namespace, knownPages, disambigNeeded));
block(block);
}
/**
* Retrieve all templates of a page.
*
* @param wikipedia Wikipedia.
* @param page Page.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveAllTemplates(
EnumWikipedia wikipedia,
Page page,
boolean block) throws APIException {
if (page == null) {
return;
}
final API api = APIFactory.getAPI();
addTask(new TemplatesCallable(wikipedia, this, api, page));
block(block);
}
/**
* Retrieve all links to a page (with redirects).
*
* @param wikipedia Wikipedia.
* @param page Page.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveAllLinksToPage(
EnumWikipedia wikipedia,
Page page, boolean block) throws APIException {
if (page == null) {
return;
}
retrieveAllLinksToPages(wikipedia, Collections.singleton(page), block);
}
/**
* Retrieve all links to a list of pages (with redirects).
*
* @param wikipedia Wikipedia.
* @param pageList List of pages.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveAllLinksToPages(
EnumWikipedia wikipedia,
Collection<Page> pageList, boolean block) throws APIException {
if ((pageList == null) || (pageList.size() == 0)) {
return;
}
final API api = APIFactory.getAPI();
for (final Page page : pageList) {
addTask(new AllLinksToPageCallable(wikipedia, this, api, page));
}
block(block);
}
/**
* Retrieve all pages it is embedded in of a list of pages.
*
* @param wikipedia Wikipedia.
* @param pageList List of pages.
* @param namespaces List of name spaces to look into.
* @param limit Flag indicating if the number of results should be limited.
* @throws APIException
*/
@SuppressWarnings("unchecked")
public List<Page> retrieveAllEmbeddedIn(
EnumWikipedia wikipedia, List<Page> pageList,
List<Integer> namespaces,
boolean limit) throws APIException {
if ((pageList == null) || (pageList.size() == 0)) {
return null;
}
final API api = APIFactory.getAPI();
for (final Page page : pageList) {
addTask(new EmbeddedInCallable(wikipedia, this, api, page, namespaces, limit));
}
List<Page> resultList = new ArrayList<Page>();
while (hasRemainingTask() && !shouldStop()) {
Object result = getNextResult();
if (result instanceof List<?>) {
List<Page> pageResult = (List<Page>) result;
for (Page page : pageResult) {
resultList.add(page);
}
}
}
Collections.sort(resultList);
Iterator<Page> itPage = resultList.iterator();
Page previousPage = null;
while (itPage.hasNext()) {
Page page = itPage.next();
if ((previousPage != null) &&
(Page.areSameTitle(previousPage.getTitle(), page.getTitle()))) {
itPage.remove();
} else {
previousPage = page;
}
}
return resultList;
}
/**
* Retrieve disambiguation information for a list of pages.
*
* @param wikipedia Wikipedia.
* @param pageList List of page.
* @param knownPages Already known pages.
* @param disambiguations Flag indicating if possible disambiguations should be retrieved.
* @param forceApiCall True if API call should be forced even if the list of disambiguation pages is loaded.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveDisambiguationInformation(
EnumWikipedia wikipedia,
List<Page> pageList, List<Page> knownPages,
boolean disambiguations, boolean forceApiCall, boolean block)
throws APIException {
if ((pageList == null) || (pageList.isEmpty())) {
return;
}
final API api = APIFactory.getAPI();
// Retrieving disambiguation status
final int maxPages = api.getMaxPagesPerQuery();
List<Page> filteredList = pageList;
if (knownPages != null) {
filteredList = new ArrayList<Page>(pageList);
filteredList.removeAll(knownPages);
}
if (filteredList.size() <= maxPages) {
addTask(new DisambiguationStatusCallable(wikipedia, this, api, filteredList, forceApiCall));
} else {
int index = 0;
while (index < filteredList.size()) {
List<Page> tmpList = new ArrayList<Page>(api.getMaxPagesPerQuery());
for (int i = 0; (i < maxPages) && (index < filteredList.size()); i++, index++) {
tmpList.add(filteredList.get(index));
}
addTask(new DisambiguationStatusCallable(wikipedia, this, api, tmpList, forceApiCall));
}
}
block(true);
// Retrieving possible disambiguations
if (disambiguations) {
for (Page p : pageList) {
Iterator<Page> iter = p.getRedirectIteratorWithPage();
while (iter.hasNext()) {
p = iter.next();
if ((Boolean.TRUE.equals(p.isDisambiguationPage())) &&
(!p.isRedirect())) {
List<Page> links = p.getLinks();
if ((links == null) || (links.size() == 0)) {
addTask(new LinksWRCallable(wikipedia, this, api, p, null, null, false));
}
}
}
}
}
block(block);
}
}