/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.gui.swing.worker; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.wikipediacleaner.api.API; import org.wikipediacleaner.api.APIException; import org.wikipediacleaner.api.APIFactory; import org.wikipediacleaner.api.constants.EnumWikipedia; import org.wikipediacleaner.api.data.DataManager; import org.wikipediacleaner.api.data.Namespace; import org.wikipediacleaner.api.data.Page; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementCategory; import org.wikipediacleaner.api.data.PageElementInternalLink; import org.wikipediacleaner.api.data.PageElementTemplate; import org.wikipediacleaner.gui.swing.basic.BasicWindow; import org.wikipediacleaner.gui.swing.basic.BasicWorker; import org.wikipediacleaner.i18n.GT; import org.wikipediacleaner.utils.Configuration; import org.wikipediacleaner.utils.ConfigurationValueBoolean; /** * SwingWorker for translating a page. */ public class TranslateWorker extends BasicWorker { private final EnumWikipedia from; private final Page page; private final String initialText; /** * @param wikipedia Wikipedia. * @param window Window. * @param from Original Wikipedia. * @param page Page. * @param text Page contents. */ public TranslateWorker( EnumWikipedia wikipedia, BasicWindow window, EnumWikipedia from, Page page, String text) { super(wikipedia, window); this.from = from; this.page = page; this.initialText = text; } /** * @return Translated text. * @see org.wikipediacleaner.gui.swing.basic.BasicWorker#construct() */ @Override public Object construct() { String text = initialText; try { Configuration config = Configuration.getConfiguration(); text = translateInternalLinks( text, config.getBoolean(null, ConfigurationValueBoolean.TRANSLATION_INTERNAL_LINK_TEXT), config.getBoolean(null, ConfigurationValueBoolean.TRANSLATION_INTERLANGUAGE)); text = translateCategories( text, config.getBoolean(null, ConfigurationValueBoolean.TRANSLATION_CATEGORY)); text = translateTemplates( text, config.getBoolean(null, ConfigurationValueBoolean.TRANSLATION_TEMPLATE_NAME), config.getBoolean(null, ConfigurationValueBoolean.TRANSLATION_TEMPLATE_NO_PARAM)); } catch (APIException e) { return null; } return text; } /** * @param text Text to translate. * @param translateText Flag indicating if internal link text should be translated. * @param useInterLanguage Flag indicating if interlanguage links can be used. * @return Text with internal links translated. * @throws APIException */ private String translateInternalLinks( String text, boolean translateText, boolean useInterLanguage) throws APIException { PageAnalysis analysis = page.getAnalysis(text, true); Collection<PageElementInternalLink> links = analysis.getInternalLinks(); Map<String, String> interwikis = new HashMap<String, String>(); StringBuilder newText = new StringBuilder(); int lastPosition = 0; for (PageElementInternalLink link : links) { String linkPage = link.getLink(); setText(GT._("Retrieving interwiki for {0}", linkPage)); String translated = null; if (!interwikis.containsKey(linkPage)) { translated = getLanguageLink(linkPage); interwikis.put(linkPage, translated); } else { translated = interwikis.get(linkPage); } if (translated != null) { if (!Page.areSameTitle(linkPage, translated)) { if (link.getBeginIndex() > lastPosition) { newText.append(text.substring(lastPosition, link.getBeginIndex())); lastPosition = link.getBeginIndex(); } newText.append("[["); if (translateText && (link.getText() == null)) { String displayed = link.getDisplayedText(); if ((displayed != null) && (displayed.length() > 0) && (Character.isLowerCase(displayed.charAt(0)))) { if (translated.length() > 1) { translated = translated.substring(0, 1).toLowerCase() + translated.substring(1); } else { translated = translated.toLowerCase(); } } } newText.append(translated); if ((translated.indexOf('#') < 0) && (link.getAnchor() != null)) { newText.append("#"); newText.append(link.getAnchor()); } if (!translateText || (link.getText() != null)) { newText.append("|"); newText.append(link.getDisplayedText()); } newText.append("]]"); lastPosition = link.getEndIndex(); } } else { if (useInterLanguage) { if (link.getBeginIndex() > lastPosition) { newText.append(text.substring(lastPosition, link.getBeginIndex())); lastPosition = link.getEndIndex(); } newText.append("[[:"); newText.append(from.getSettings().getLanguage()); newText.append(":"); newText.append(link.getFullLink()); newText.append("|"); newText.append(link.getDisplayedText()); newText.append("]]"); lastPosition = link.getEndIndex(); } } } if (newText.length() == 0) { return text; } if (lastPosition < text.length()) { newText.append(text.substring(lastPosition)); lastPosition = text.length(); } return newText.toString(); } /** * @param text Text to translate. * @param translate Flag indicating if categories should be translated. * @return Text with categories translated. * @throws APIException */ private String translateCategories( String text, boolean translate) throws APIException { if (!translate) { return text; } Namespace categoryNamespace = getWikipedia().getWikiConfiguration().getNamespace(Namespace.CATEGORY); if (categoryNamespace == null) { return text; } PageAnalysis analysis = page.getAnalysis(text, true); Collection<PageElementCategory> categories = analysis.getCategories(); Map<String, String> interwikis = new HashMap<String, String>(); StringBuilder newText = new StringBuilder(); int lastPosition = 0; for (PageElementCategory category : categories) { String categoryName = category.getName(); String fullCategoryName = categoryNamespace.getCanonicalTitle() + ":" + categoryName; setText(GT._("Retrieving interwiki for {0}", fullCategoryName)); String translated = null; if (!interwikis.containsKey(categoryName)) { translated = getLanguageLink(fullCategoryName); interwikis.put(categoryName, translated); } else { translated = interwikis.get(categoryName); } if ((translated != null) && !Page.areSameTitle(categoryName, translated)) { if (category.getBeginIndex() > lastPosition) { newText.append(text.substring(lastPosition, category.getBeginIndex())); lastPosition = category.getBeginIndex(); } newText.append("[["); newText.append(translated); if (category.getSort() != null) { newText.append("|"); newText.append(category.getSort()); } newText.append("]]"); lastPosition = category.getEndIndex(); } } if (newText.length() == 0) { return text; } if (lastPosition < text.length()) { newText.append(text.substring(lastPosition)); lastPosition = text.length(); } return newText.toString(); } /** * @param text Text to translate. * @param translateName Flag indicating if templates names should be translated. * @param translateWithoutParams Flag indicating if templates without parameters should be translated. * @return Text with templates translated. * @throws APIException */ private String translateTemplates( String text, boolean translateName, boolean translateWithoutParams) throws APIException { if (!translateName) { return text; } Namespace templateNamespace = getWikipedia().getWikiConfiguration().getNamespace(Namespace.TEMPLATE); if (templateNamespace == null) { return text; } PageAnalysis analysis = page.getAnalysis(text, true); Collection<PageElementTemplate> templates = analysis.getTemplates(); Map<String, String> interwikis = new HashMap<String, String>(); StringBuilder newText = new StringBuilder(); int lastPosition = 0; for (PageElementTemplate template : templates) { String templateName = template.getTemplateName(); String fullTemplateName = templateNamespace.getCanonicalTitle() + ":" + templateName; setText(GT._("Retrieving interwiki for {0}", fullTemplateName)); String translated = null; if (!interwikis.containsKey(templateName)) { translated = getLanguageLink(fullTemplateName); interwikis.put(templateName, translated); } else { translated = interwikis.get(templateName); } if ((translated != null) && !Page.areSameTitle(templateName, translated)) { if (template.getBeginIndex() > lastPosition) { newText.append(text.substring(lastPosition, template.getBeginIndex())); lastPosition = template.getBeginIndex(); } if (translateWithoutParams && (template.getParameterCount() == 0)) { newText.append("{{"); int columnPos = translated.indexOf(':'); if (columnPos < 0) { newText.append(translated); } else { newText.append(translated.substring(columnPos + 1)); } newText.append("}}"); lastPosition = template.getEndIndex(); } else { newText.append("<!-- "); newText.append(translated); newText.append(" -->"); } } } if (newText.length() == 0) { return text; } if (lastPosition < text.length()) { newText.append(text.substring(lastPosition)); lastPosition = text.length(); } return newText.toString(); } /** * @param pageName Page name. * @return Language link. * @throws APIException */ private String getLanguageLink(String pageName) throws APIException { API api = APIFactory.getAPI(); String link = api.getLanguageLink(from, getWikipedia(), pageName); if (link != null) { return link; } Page original = DataManager.getPage(from, pageName, null, null, null); //api.retrieveLinksWithRedirects(from, original, null, null); api.initializeRedirect(from, Collections.singletonList(original)); if (!original.isRedirect()) { return link; } api.retrieveContents(from, Collections.singletonList(original), false, true); link = api.getLanguageLink(from, getWikipedia(), original.getRedirectTitle()); if (link == null) { return null; } String destination = original.getRedirectDestination(); int anchorPos = destination.indexOf('#'); if (anchorPos < 0) { return link; } return link + destination.substring(anchorPos); } }