/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.gui.swing.pagelist; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Properties; import java.util.Set; import javax.swing.JOptionPane; import org.wikipediacleaner.api.API; import org.wikipediacleaner.api.APIException; import org.wikipediacleaner.api.APIFactory; import org.wikipediacleaner.api.MediaWiki; import org.wikipediacleaner.api.constants.EnumQueryPage; import org.wikipediacleaner.api.constants.EnumWikipedia; import org.wikipediacleaner.api.constants.WPCConfiguration; import org.wikipediacleaner.api.constants.WPCConfigurationString; import org.wikipediacleaner.api.data.DataManager; import org.wikipediacleaner.api.data.Namespace; import org.wikipediacleaner.api.data.Page; import org.wikipediacleaner.api.data.PageComparator; import org.wikipediacleaner.gui.swing.basic.BasicWindow; import org.wikipediacleaner.gui.swing.basic.BasicWorker; import org.wikipediacleaner.gui.swing.basic.Utilities; import org.wikipediacleaner.i18n.GT; import org.wikipediacleaner.utils.Configuration; /** * SwingWorker for getting various list of pages. */ public class PageListWorker extends BasicWorker { /** * The <code>Mode</code> allows to specify how the PageListWorker * will use the list of elements provided to it: * <ul> * <li>ALL_DAB_PAGES (list not used): * Retrieve list of all disambiguation pages.</li> * <li>BACKLINKS (list of pages): * Retrieve list of backlinks.</li> * <li>CATEGORY_MEMBERS (list of categories): * Retrieve list of articles in the categories.</li> * <li>CATEGORY_MEMBERS_ARTICLES (list of categories): * Retrieve list of articles in the categories. * If talk pages are found, the related article is used instead.</li> * <li>DAB_WATCH (list of disambiguation pages): * Retrieve list of pages linking to the disambiguation pages and needing attention.</li> * <li>DIRECT (direct list)</li> * <li>EMBEDDED_IN (list of templates): * Retrieve list of pages embedding the templates.</li> * <li>INTERNAL_LINKS (list of pages): * Retrieve list of internal links in the pages.</li> * <li>MISSING_TEMPLATES (list not used): * Retrieve list of pages with missing templates.</li> * <li>QUERY_PAGE (code of the special list to retrieve) * Retrieve list of pages of a special list.</li> * <li>SEARCH_TITLES (list of keywords) * Retrieve list of pages matching keywords.</li> * <li>WATCH_LIST (list not used) * Retrieve list of pages in the watch list.</li> * </ul> */ public static enum Mode { ALL_DAB_PAGES, BACKLINKS, CATEGORY_MEMBERS, CATEGORY_MEMBERS_ARTICLES, DAB_WATCH, DIRECT, EMBEDDED_IN, INTERNAL_LINKS_MAIN(GT._("Articles in main namespace")), INTERNAL_LINKS_TALKPAGES_CONVERTED(GT._("Articles associated to talk pages")), MISSING_TEMPLATES(GT._("Pages with missing templates")), PROTECTED_TITLES(GT._("Protected titles with links from articles")), QUERY_PAGE, SEARCH_TITLES, WATCH_LIST; /** * Optional title for the associated list. */ final private String title; /** * Constructor. */ private Mode() { this(null); } /** * @param title Title for the associated list. */ private Mode(String title) { this.title = title; } /** * @return Title for the associated list. */ public String getTitle() { return title; } } private final Page referencePage; private final List<String> elementNames; private final Mode mode; private final boolean watchList; private final List<Page> pageList; private final String message; /** * @param wikipedia Wikipedia. * @param window Window. * @param elementNames List of elements (page names, ...). * @param mode Mode for determining the list of pages. * @param message Window title. */ public PageListWorker( EnumWikipedia wikipedia, BasicWindow window, Page referencePage, List<String> elementNames, Mode mode, boolean watchList, String message) { super(wikipedia, window); this.referencePage = referencePage; this.pageList = new ArrayList<Page>(); this.elementNames = elementNames; this.mode = mode; this.watchList = watchList; this.message = message; } /* (non-Javadoc) * @see org.wikipediacleaner.gui.swing.utils.SwingWorker#finished() */ @Override public void finished() { super.finished(); Object result = get(); if (!(result instanceof Throwable)) { if (mode == Mode.ALL_DAB_PAGES) { Set<String> set = new HashSet<String>(pageList.size()); for (Page page : pageList) { set.add(page.getTitle()); } getWikipedia().setDisambiguationPages(set); int answer = Utilities.displayYesNoWarning( (getWindow() != null) ? getWindow().getParentComponent() : null, GT._( "You have loaded the list of all disambiguation pages to speed up page analysis.\n" + "Do you want to display the list of all disambiguation pages ?")); if (answer != JOptionPane.YES_OPTION) { return; } } PageListWindow.createPageListWindow( message, referencePage, pageList, getWikipedia(), watchList); } } /* (non-Javadoc) * @see org.wikipediacleaner.gui.swing.utils.SwingWorker#construct() */ @Override public Object construct() { try { List<Page> pages = new ArrayList<Page>(); boolean retrieveDisambiguationInformation = true; switch (mode) { // List all disambiguations pages case ALL_DAB_PAGES: constructAllDab(pages); retrieveDisambiguationInformation = false; break; // List of page back links case BACKLINKS: constructBackLinks(pages); break; // List members of a category case CATEGORY_MEMBERS: constructCategoryMembers(pages); break; // List article members of a category case CATEGORY_MEMBERS_ARTICLES: constructCategoryMembersArticles(pages); break; // List pages with disambiguation links requiring attention case DAB_WATCH: constructDabWatch(pages); break; // List pages embedding a template case EMBEDDED_IN: constructEmbeddedIn(pages); break; // List internal links in a page case INTERNAL_LINKS_MAIN: constructInternalLinks(pages, false); break; // List internal links in a page case INTERNAL_LINKS_TALKPAGES_CONVERTED: constructInternalLinks(pages, true); break; // Retrieve list of pages with missing templates case MISSING_TEMPLATES: constructMissingTemplates(pages); break; // Retrieve list of protected titles with backlinks case PROTECTED_TITLES: constructProtectedTitles(pages); break; // Retrieve a special list case QUERY_PAGE: constructQueryPage(pages); break; // Search similar pages case SEARCH_TITLES: constructSearchTitles(pages); break; // List pages in the watch list case WATCH_LIST: constructWatchList(pages); break; default: pages.addAll(constructInternalPageList()); break; } if (retrieveDisambiguationInformation) { MediaWiki mw = MediaWiki.getMediaWikiAccess(this); List<Page> tmpPages = new ArrayList<Page>(); for (Page tmpPage : pages) { if (tmpPage.isDisambiguationPage() == null) { tmpPages.add(tmpPage); } } if (!tmpPages.isEmpty()) { mw.retrieveDisambiguationInformation(getWikipedia(), tmpPages, null, false, true, true); } } if (!shouldContinue()) { return null; } pageList.addAll(pages); } catch (APIException e) { return e; } return null; } /** * Construct the list of pages from the list of page names. * * @return Internal list of pages. */ private List<Page> constructInternalPageList() { if (elementNames == null) { return new ArrayList<Page>(); } List<Page> pages = new ArrayList<Page>(elementNames.size()); for (String pageName : elementNames) { pages.add(DataManager.getPage(getWikipedia(), pageName, null, null, null)); } return pages; } /** * Construct list of all disambiguation pages. * * @param pages List of all disambiguation pages. * @throws APIException */ private void constructAllDab(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); EnumWikipedia wiki = getWikipedia(); List<Page> tmpPages = wiki.constuctDisambiguationPages(api); if (tmpPages != null) { pages.addAll(tmpPages); } } /** * Construct list of backlinks. * * @param pages List of backlinks. * @throws APIException */ private void constructBackLinks(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); for (String pageName : elementNames) { Page page = DataManager.getPage(getWikipedia(), pageName, null, null, null); api.retrieveLinksHere(getWikipedia(), page, true); List<Page> tmpPages = page.getRelatedPages(Page.RelatedPages.LINKS_HERE); if (tmpPages != null) { for (Page tmpPage : tmpPages) { if (!pages.contains(tmpPage)) { pages.add(tmpPage); } } } } } /** * Construct list of pages members of the categories. * * @param pages List of pages members of the categories. * @throws APIException */ private void constructCategoryMembers(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); for (String pageName : elementNames) { Page page = DataManager.getPage(getWikipedia(), pageName, null, null, null); api.retrieveCategoryMembers(getWikipedia(), page, 0, true, Integer.MAX_VALUE); List<Page> tmpPages = page.getRelatedPages(Page.RelatedPages.CATEGORY_MEMBERS); if (tmpPages != null) { for (Page tmpPage : tmpPages) { if (!pages.contains(tmpPage)) { pages.add(tmpPage); } } } } } /** * Construct list of articles members of the categories. * * @param pages List of articles members of the categories. * @throws APIException */ private void constructCategoryMembersArticles(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); for (String pageName : elementNames) { Page page = DataManager.getPage(getWikipedia(), pageName, null, null, null); api.retrieveCategoryMembers(getWikipedia(), page, 0, true, Integer.MAX_VALUE); List<Page> tmpPages = page.getRelatedPages(Page.RelatedPages.CATEGORY_MEMBERS); if (tmpPages != null) { WPCConfiguration configuration = getWikipedia().getConfiguration(); for (Page tmpPage : tmpPages) { if (!tmpPage.isArticle()) { String title = tmpPage.getArticlePageName(); String todoSubpage = configuration.getString(WPCConfigurationString.TODO_SUBPAGE); if ((todoSubpage != null) && (todoSubpage.trim().length() > 0) && (title.endsWith("/" + todoSubpage))) { title = title.substring(0, title.length() - 1 - todoSubpage.length()); } tmpPage = DataManager.getPage(getWikipedia(), title, null, null, null); } if (!pages.contains(tmpPage)) { pages.add(tmpPage); } } } } } /** * Construct list of pages with disambiguation links requiring attention. * * @param pages List of disambiguation pages. * @throws APIException */ private void constructDabWatch(List<Page> pages) throws APIException { if (elementNames != null) { List<Page> tmpPages = constructInternalPageList(); MediaWiki mw = MediaWiki.getMediaWikiAccess(this); mw.retrieveAllLinksToPages(getWikipedia(), tmpPages, true); Configuration configuration = Configuration.getConfiguration(); for (Page tmpPage : tmpPages) { List<Page> backlinks = tmpPage.getAllLinksToPage(); if (backlinks != null) { Properties pageProperties = configuration.getSubProperties( getWikipedia(), Configuration.PROPERTIES_BACKLINKS, tmpPage.getTitle()); for (Page page : backlinks) { if ((pageProperties == null) || (!pageProperties.containsKey(page.getTitle()))) { Integer namespace = page.getNamespace(); if ((namespace != null) && ((namespace.intValue() == Namespace.MAIN) || (namespace.intValue() == Namespace.TEMPLATE))) { if (!pages.contains(page)) { pages.add(page); } } } } } } } } /** * Construct list of pages embedding the templates. * * @param pages List of pages embedding the templates. * @throws APIException */ private void constructEmbeddedIn(List<Page> pages) throws APIException { if (elementNames != null) { List<Page> tmpPages = constructInternalPageList(); MediaWiki mw = MediaWiki.getMediaWikiAccess(this); pages.addAll(mw.retrieveAllEmbeddedIn( getWikipedia(), tmpPages, null, !getWikipedia().getConnection().getUser().isMemberOf("bot"))); } } /** * Construct list of internal links contained in the pages. * * @param pages List of internal links in the pages. * @param convertTalkPages True if talk pages should be converted to their respective articles. * @throws APIException */ private void constructInternalLinks( List<Page> pages, boolean convertTalkPages) throws APIException { for (String dabList : elementNames) { Page page = DataManager.getPage(getWikipedia(), dabList, null, null, null); MediaWiki mw = MediaWiki.getMediaWikiAccess(this); mw.retrieveAllLinks(getWikipedia(), page, null, null, true, true); Iterator<Page> iter = page.getLinks().iterator(); while (iter.hasNext()) { Page link = iter.next(); if (link != null) { if (convertTalkPages && !link.isArticle()) { link = link.getArticlePage(); } if ((link.isInMainNamespace()) && (!pages.contains(link))) { pages.add(link); } } } } } /** * Construct list of pages with missing templates. * * @param pages List of pages with missing templates. * @throws APIException */ private void constructMissingTemplates(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); EnumWikipedia wiki = getWikipedia(); setText(GT._("Retrieving list of missing templates")); List<Page> tmpPages = api.getQueryPages(wiki, EnumQueryPage.WANTED_TEMPLATES); if (tmpPages == null) { return; } setText(GT._("Checking that the templates are still missing")); api.retrieveInfo(wiki, tmpPages); List<Page> tmpPages2 = new ArrayList<Page>(); for (Page tmpPage : tmpPages) { Boolean exists = tmpPage.isExisting(); if (!Boolean.TRUE.equals(exists)) { tmpPages2.add(tmpPage); } } MediaWiki mw = MediaWiki.getMediaWikiAccess(this); List<Page> tmpPages3 = mw.retrieveAllEmbeddedIn( wiki, tmpPages2, wiki.getConfiguration().getEncyclopedicNamespaces(), true); pages.addAll(tmpPages3); Collections.sort(pages, PageComparator.getNamespaceFirstComparator()); } /** * Construct list of protected titles with backlinks. * * @param pages List of protected titles with backlinks. * @throws APIException */ private void constructProtectedTitles(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); EnumWikipedia wiki = getWikipedia(); setText(GT._("Retrieving list of protected titles")); List<Page> tmpPages = api.getProtectedTitles( wiki, Collections.singletonList(Namespace.MAIN), false); if ((tmpPages == null) || (tmpPages.isEmpty())) { return; } setText(GT._("Checking that protected titles have backlinks")); MediaWiki mw = MediaWiki.getMediaWikiAccess(this); mw.retrieveAllLinksToPages(wiki, tmpPages, true); for (Page page : tmpPages) { Integer backlinks = page.getBacklinksCountInMainNamespace(); if ((backlinks != null) && (backlinks.intValue() > 0)) { pages.add(page); } } Collections.sort(pages, PageComparator.getNamespaceFirstComparator()); } /** * Construct special list of pages. * * @param pages List of pages. * @throws APIException */ private void constructQueryPage(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); EnumWikipedia wiki = getWikipedia(); EnumQueryPage query = EnumQueryPage.findByCode(elementNames.get(0)); List<Page> tmpPages = api.getQueryPages(wiki, query); if (tmpPages != null) { pages.addAll(tmpPages); } } /** * Construct list of search results. * * @param pages List of search results. * @throws APIException */ private void constructSearchTitles(List<Page> pages) throws APIException { if (elementNames != null) { final API api = APIFactory.getAPI(); for (String pageName : elementNames) { Page page = DataManager.getPage(getWikipedia(), pageName, null, null, null); api.retrieveSimilarPages(getWikipedia(), page, true); pages.addAll(page.getRelatedPages(Page.RelatedPages.SIMILAR_PAGES)); } } } /** * Construct list of pages in the watch list. * * @param pages List of pages in the watch list. * @throws APIException */ private void constructWatchList(List<Page> pages) throws APIException { final API api = APIFactory.getAPI(); List<Page> tmpPages = api.retrieveRawWatchlist(getWikipedia()); if (tmpPages != null) { pages.addAll(tmpPages); } } }