/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.Collection; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementComment; import org.wikipediacleaner.api.data.PageElementTag; /** * Algorithm for analyzing error 56 of check wikipedia project. * Error 56: Arrow as ASCII art */ public class CheckErrorAlgorithm056 extends CheckErrorAlgorithmBase { public CheckErrorAlgorithm056() { super("Arrow as ASCII art"); } /** * Detected arrows and their replacement. */ private final static String[][] allArrows = { { "<--->", "↔" }, { "<-->" , "↔" }, { "<->" , "↔" }, { "<–––>", "↔" }, { "<––>" , "↔" }, { "<–>" , "↔" }, { "<———>", "↔" }, { "<——>" , "↔" }, { "<—>" , "↔" }, { "<---" , "←" }, { "<--" , "←" }, { "<-" , "←" }, { "<–––" , "←" }, { "<––" , "←" }, { "<–" , "←" }, { "<———" , "←" }, { "<——" , "←" }, { "<—" , "←" }, { "<===>", "⇔" }, { "<==>" , "⇔" }, { "<=>" , "⇔" }, { "<===" , "⇐" }, { "<==" , "⇐" }, { "<=" , "⇐", "≤" }, { "--->", "→" }, { "-->" , "→" }, { "->" , "→" }, { "–––>", "→" }, { "––>" , "→" }, { "–>" , "→" }, { "———>", "→" }, { "——>" , "→" }, { "—>" , "→" }, { "===>", "⇒" }, { "==>" , "⇒" }, { "=>" , "⇒", "≥" }, }; /** * Tags in which arrows should not be detected. */ private final static String[] exceptTags = { PageElementTag.TAG_HTML_TT, PageElementTag.TAG_HTML_CODE, PageElementTag.TAG_WIKI_HIERO, PageElementTag.TAG_WIKI_MATH, PageElementTag.TAG_WIKI_MATH_CHEM, PageElementTag.TAG_WIKI_NOWIKI, PageElementTag.TAG_WIKI_PRE, PageElementTag.TAG_WIKI_SCORE, PageElementTag.TAG_WIKI_SOURCE, PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, PageElementTag.TAG_WIKI_TIMELINE, }; /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // Check each character from the beginning boolean result = false; int currentIndex = 0; String contents = analysis.getContents(); while (currentIndex < contents.length()) { boolean shouldCheck = true; int nextIndex = currentIndex + 1; // Check each kind of arrow int arrowLen = 0; String[] arrows = null; if (shouldCheck) { for (int i = 0; (i < allArrows.length) && (arrowLen == 0); i++) { if (contents.startsWith(allArrows[i][0], currentIndex)) { arrowLen = allArrows[i][0].length(); arrows = allArrows[i]; } } if (arrowLen == 0) { shouldCheck = false; } } // Check if inside a comment if (shouldCheck) { PageElementComment comment = analysis.isInComment(currentIndex); if (comment != null) { nextIndex = comment.getEndIndex(); shouldCheck = false; } } // Check if inside a specific tag if (shouldCheck) { for (String tagName : exceptTags) { if (shouldCheck) { PageElementTag tag = analysis.getSurroundingTag(tagName, currentIndex); if (tag != null) { nextIndex = tag.getCompleteEndIndex(); shouldCheck = false; } } } } if (shouldCheck) { if (arrows != null) { if (errors == null) { return true; } result = true; // Check for cases like <ref name=> boolean reported = false; if ((arrowLen == 2) && (contents.startsWith("=>", currentIndex))) { PageElementTag tag = analysis.isInTag(currentIndex); if ((tag != null) && (tag.getEndIndex() == currentIndex + 2)) { int tmpIndex = currentIndex - 1; while ((tmpIndex > 0) && (Character.isLetter(contents.charAt(tmpIndex)))) { tmpIndex--; } if ((tmpIndex > 0) && (currentIndex > tmpIndex + 1) && (contents.charAt(tmpIndex) == ' ')) { String attributeName = contents.substring(tmpIndex + 1, currentIndex); boolean automatic = false; if (PageElementTag.TAG_WIKI_REF.equals(tag.getName()) && attributeName.equals("name")) { automatic = true; } CheckErrorResult errorResult = createCheckErrorResult( analysis, tmpIndex, currentIndex + arrowLen); errorResult.addReplacement(">", automatic); errors.add(errorResult); reported = true; } } } if (!reported) { CheckErrorResult errorResult = createCheckErrorResult( analysis, currentIndex, currentIndex + arrowLen); for (int i = 1; i < arrows.length; i++) { errorResult.addReplacement(arrows[i]); } errors.add(errorResult); } } nextIndex = currentIndex + arrowLen; } currentIndex = nextIndex; } return result; } /** * Automatic fixing of all the errors in the page. * * @param analysis Page analysis. * @return Page contents after fix. */ @Override protected String internalAutomaticFix(PageAnalysis analysis) { return fixUsingAutomaticReplacement(analysis); } }