/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.Collection; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.check.HtmlCharacters; import org.wikipediacleaner.api.check.CheckErrorResult.ErrorLevel; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.gui.swing.component.MWPane; import org.wikipediacleaner.i18n.GT; /** * Algorithm for analyzing error 27 of check wikipedia project. * Error 27: Unicode syntax */ public class CheckErrorAlgorithm027 extends CheckErrorAlgorithmBase { /** * Possible global fixes. */ private final static String[] globalFixes = new String[] { GT._("Replace all"), }; public CheckErrorAlgorithm027() { super("Unicode syntax"); } /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // Analyzing the text from the beginning boolean result = false; String contents = analysis.getContents(); int ampersandIndex = contents.indexOf('&'); int maxLength = contents.length(); while ((ampersandIndex >= 0) && (ampersandIndex < maxLength)) { // TODO : Check if we should look for a match a this position int tmpIndex = ampersandIndex + 1; if ((tmpIndex < maxLength) && (contents.charAt(tmpIndex) == '#')) { tmpIndex++; } int radix = 10; if ((tmpIndex < maxLength) && (contents.charAt(tmpIndex) == 'x')) { radix = 16; tmpIndex++; } int startIndex = tmpIndex; while ((tmpIndex < maxLength) && (Character.digit(contents.charAt(tmpIndex), radix) >= 0)) { tmpIndex++; } if ((tmpIndex > startIndex) && (tmpIndex < maxLength) && (contents.charAt(tmpIndex) == ';')) { int entityNumber = Integer.parseInt(contents.substring(startIndex, tmpIndex), radix); HtmlCharacters htmlCharacter = HtmlCharacters.getCharacterByEntityNumber(entityNumber); boolean shouldReplace = true; if (htmlCharacter != null) { shouldReplace = htmlCharacter.shouldReplaceNumeric(); if (HtmlCharacters.SYMBOL_VERTICAL_BAR.equals(htmlCharacter) && (analysis.isInTemplate(ampersandIndex) != null)) { shouldReplace = false; } } if (shouldReplace) { if (errors == null) { return true; } result = true; CheckErrorResult errorResult = createCheckErrorResult( analysis, ampersandIndex, tmpIndex + 1, htmlCharacter != null ? ErrorLevel.ERROR : ErrorLevel.WARNING); if (htmlCharacter != null) { errorResult.addReplacement("" + htmlCharacter.getValue(), true); } else { errorResult.addReplacement("" + (char) entityNumber, false); } errors.add(errorResult); } } ampersandIndex = contents.indexOf('&', ampersandIndex + 1); } return result; } /** * Bot fixing of all the errors in the page. * * @param analysis Page analysis. * @return Page contents after fix. */ @Override protected String internalBotFix(PageAnalysis analysis) { return fixUsingAutomaticReplacement(analysis); } /** * @return List of possible global fixes. */ @Override public String[] getGlobalFixes() { return globalFixes; } /** * Fix all the errors in the page. * * @param fixName Fix name (extracted from getGlobalFixes()). * @param analysis Page analysis. * @param textPane Text pane. * @return Page contents after fix. */ @Override public String fix(String fixName, PageAnalysis analysis, MWPane textPane) { return fixUsingFirstReplacement(fixName, analysis); } }