/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.check.HtmlCharacters; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementTag; import org.wikipediacleaner.api.data.PageElementTemplate; /** * Algorithm for analyzing error 11 of check wikipedia project. * Error 11: HTML named entities */ public class CheckErrorAlgorithm011 extends CheckErrorAlgorithmHtmlNamedEntities { /** * List of HTML characters managed by this error. */ private final List<HtmlCharacters> htmlCharacters; public CheckErrorAlgorithm011() { super("HTML named entities"); htmlCharacters = new ArrayList<HtmlCharacters>(); htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_ACUTE_ACCENT); // á htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_ACUTE_ACCENT); // Á htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_CIRCUMFLEX_ACCENT); // â htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_CIRCUMFLEX_ACCENT); //  htmlCharacters.add(HtmlCharacters.LETTER_SMALL_AE); // æ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_AE); // Æ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_GRAVE_ACCENT); // à htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_GRAVE_ACCENT); // À htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ALPHA); // α // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ALPHA); // Α htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_RING); // å htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_RING); // Å htmlCharacters.add(HtmlCharacters.SYMBOL_ALMOST_EQUAL); // ≈ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_TILDE); // ã htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_TILDE); // à htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_UMLAUT_MARK); // ä htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_UMLAUT_MARK); // Ä htmlCharacters.add(HtmlCharacters.SYMBOL_DOUBLE_LOW_9_QUOTATION_MARK); // „ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_BETA); // β // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_BETA); // Β htmlCharacters.add(HtmlCharacters.SYMBOL_BROKEN_VERTICAL_BAR); // ¦ htmlCharacters.add(HtmlCharacters.SYMBOL_BULLET); // • htmlCharacters.add(HtmlCharacters.LETTER_SMALL_C_CEDILLA); // ç htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_C_CEDILLA); // Ç htmlCharacters.add(HtmlCharacters.SYMBOL_CENT); // ¢ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_CHI); // χ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_CHI); // Χ htmlCharacters.add(HtmlCharacters.SYMBOL_CLUB); // ♣ htmlCharacters.add(HtmlCharacters.SYMBOL_COPYRIGHT); // © htmlCharacters.add(HtmlCharacters.SYMBOL_CARRIAGE_RETURN_ARROW); // ↵ // htmlCharacters.add(HtmlCharacters.SYMBOL_DAGGER); // † htmlCharacters.add(HtmlCharacters.SYMBOL_DOUBLE_DAGGER); // ‡ htmlCharacters.add(HtmlCharacters.SYMBOL_DOWN_ARROW); // ↓ htmlCharacters.add(HtmlCharacters.SYMBOL_DOWN_DOUBLE_ARROW); // ⇓ htmlCharacters.add(HtmlCharacters.SYMBOL_DEGREE); // ° htmlCharacters.add(HtmlCharacters.LETTER_SMALL_DELTA); // δ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_DELTA); // Δ htmlCharacters.add(HtmlCharacters.SYMBOL_DIAMOND); // ♦ htmlCharacters.add(HtmlCharacters.SYMBOL_DIVISION); // ÷ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_ACUTE_ACCENT); // é htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_ACUTE_ACCENT); // É htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_CIRCUMFLEX_ACCENT); // ê htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_CIRCUMFLEX_ACCENT); // Ê htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_GRAVE_ACCENT); // è htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_GRAVE_ACCENT); // È htmlCharacters.add(HtmlCharacters.LETTER_SMALL_EPSILON); // ε // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_EPSILON); // Ε htmlCharacters.add(HtmlCharacters.SYMBOL_EQUIVALENT); // ≡ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ETA); // η // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ETA); // Η htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ETH); // ð htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ETH); // Ð htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_UMLAUT_MARK); // ë htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_UMLAUT_MARK); // Ë htmlCharacters.add(HtmlCharacters.SYMBOL_EURO); // € htmlCharacters.add(HtmlCharacters.LETTER_F_WITH_HOOK); // ƒ htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_1_2); // ½ htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_1_4); // ¼ htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_3_4); // ¾ htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_SLASH); // ⁄ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_GAMMA); // γ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_GAMMA); // Γ htmlCharacters.add(HtmlCharacters.SYMBOL_GREATER_OR_EQUAL); // ≥ htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_RIGHT_ARROW); // ↔ htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_RIGHT_DOUBLE_ARROW); // ⇔ htmlCharacters.add(HtmlCharacters.SYMBOL_HEART); // ♥ htmlCharacters.add(HtmlCharacters.SYMBOL_HORIZONTAL_ELLIPSIS); // … htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_ACUTE_ACCENT); // í htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_ACUTE_ACCENT); // Í htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_CIRCUMFLEX_ACCENT); // î htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_CIRCUMFLEX_ACCENT); // Î htmlCharacters.add(HtmlCharacters.SYMBOL_INVERTED_EXCLAMATION_MARK); // ¡ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_GRAVE_ACCENT); // ì htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_GRAVE_ACCENT); // Ì htmlCharacters.add(HtmlCharacters.SYMBOL_INFINITY); // ∞ htmlCharacters.add(HtmlCharacters.SYMBOL_INTEGRAL); // ∫ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_IOTA); // ι htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_IOTA); // Ι htmlCharacters.add(HtmlCharacters.SYMBOL_INVERTED_QUESTION_MARK); // ¿ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_UMLAUT_MARK); // ï htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_UMLAUT_MARK); // Ï htmlCharacters.add(HtmlCharacters.LETTER_SMALL_KAPPA); // κ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_KAPPA); // Κ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_LAMBDA); // λ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_LAMBDA); // Λ htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_ANGLE_QUOTATION_MARK); // « htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_ARROW); // ← htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_DOUBLE_ARROW); // ⇐ htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_DOUBLE_QUOTATION_MARK); // “ htmlCharacters.add(HtmlCharacters.SYMBOL_LESS_OR_EQUAL); // ≤ htmlCharacters.add(HtmlCharacters.SYMBOL_LOZENGE); // ◊ htmlCharacters.add(HtmlCharacters.SYMBOL_SINGLE_LEFT_ANGLE_QUOTATION); // ‹ htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_SINGLE_QUOTATION_MARK); // ‘ // htmlCharacters.add(HtmlCharacters.SYMBOL_EM_DASH); // — htmlCharacters.add(HtmlCharacters.SYMBOL_MICRO); // µ htmlCharacters.add(HtmlCharacters.SYMBOL_MIDDLE_DOT); // · // htmlCharacters.add(HtmlCharacters.SYMBOL_MINUS); // − htmlCharacters.add(HtmlCharacters.LETTER_SMALL_MU); // μ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_MU); // Μ // htmlCharacters.add(HtmlCharacters.SYMBOL_EN_DASH); // – htmlCharacters.add(HtmlCharacters.SYMBOL_NOT_EQUAL); // ≠ htmlCharacters.add(HtmlCharacters.SYMBOL_NEGATION); // ¬ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_N_TILDE); // ñ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_N_TILDE); // Ñ // htmlCharacters.add(HtmlCharacters.LETTER_SMALL_NU); // ν // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_NU); // Ν htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_ACUTE_ACCENT); // ó htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_ACUTE_ACCENT); // Ó htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_CIRCUMFLEX_ACCENT); // ô htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_CIRCUMFLEX_ACCENT); // Ô htmlCharacters.add(HtmlCharacters.LETTER_SMALL_OE); // œ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_OE); // Œ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_GRAVE_ACCENT); // ò htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_GRAVE_ACCENT); // Ò htmlCharacters.add(HtmlCharacters.SYMBOL_OVERLINE); // ‾ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_OMEGA); // ω htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_OMEGA); // Ω // htmlCharacters.add(HtmlCharacters.LETTER_SMALL_OMICRON); // ο // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_OMICRON); // Ο htmlCharacters.add(HtmlCharacters.SYMBOL_FEMININE_ORDINAL_INDICATOR); // ª htmlCharacters.add(HtmlCharacters.SYMBOL_MASCULINE_ORDINAL_INDICATOR); // º htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_SLASH); // ø htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_SLASH); // Ø htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_TILDE); // õ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_TILDE); // Õ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_UMLAUT_MARK); // ö htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_UMLAUT_MARK); // Ö htmlCharacters.add(HtmlCharacters.SYMBOL_PARAGRAPH); // ¶ htmlCharacters.add(HtmlCharacters.SYMBOL_PART); // ∂ htmlCharacters.add(HtmlCharacters.SYMBOL_PER_MILLE); // ‰ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_PHI); // φ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_PHI); // Φ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_PI); // π htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_PI); // Π htmlCharacters.add(HtmlCharacters.LETTER_PI_SYMBOL); // ϖ htmlCharacters.add(HtmlCharacters.SYMBOL_PLUS_OR_MINUS); // &plusm; htmlCharacters.add(HtmlCharacters.SYMBOL_POUND); // £ // htmlCharacters.add(HtmlCharacters.SYMBOL_MINUTES); // ′ // htmlCharacters.add(HtmlCharacters.SYMBOL_SECONDS); // ″ htmlCharacters.add(HtmlCharacters.SYMBOL_PROD); // ∏ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_PSI); // ψ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_PSI); // Ψ htmlCharacters.add(HtmlCharacters.SYMBOL_QUOTATION_MARK); // " htmlCharacters.add(HtmlCharacters.SYMBOL_SQUARE_ROOT); // √ htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_ANGLE_QUOTATION_MARK); // » htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_ARROW); // → htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_DOUBLE_ARROW); // ⇒ htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_DOUBLE_QUOTATION_MARK); // ” htmlCharacters.add(HtmlCharacters.SYMBOL_REGISTERED_TRADEMARK); // ® htmlCharacters.add(HtmlCharacters.LETTER_SMALL_RHO); // ρ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_RHO); // Ρ htmlCharacters.add(HtmlCharacters.SYMBOL_SINGLE_RIGHT_ANGLE_QUOTATION); // › htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_SINGLE_QUOTATION_MARK); // ’ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_S_CARON); // š htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_S_CARON); // Š htmlCharacters.add(HtmlCharacters.SYMBOL_SECTION); // § htmlCharacters.add(HtmlCharacters.LETTER_SMALL_SIGMA); // σ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_SIGMA); // Σ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_SIGMAF); // ς htmlCharacters.add(HtmlCharacters.SYMBOL_SPADE); // ♠ htmlCharacters.add(HtmlCharacters.SYMBOL_SUM); // ∑ htmlCharacters.add(HtmlCharacters.SYMBOL_SUPERSCRIPT_1); // ¹ htmlCharacters.add(HtmlCharacters.SYMBOL_SUPERSCRIPT_2); // ² htmlCharacters.add(HtmlCharacters.SYMBOL_SUPERSCRIPT_3); // ³ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_SHARP_S); // ß htmlCharacters.add(HtmlCharacters.LETTER_SMALL_TAU); // τ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_TAU); // Τ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_THETA); // θ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_THETA); // Θ htmlCharacters.add(HtmlCharacters.LETTER_THETA_SYMBOL); // ϑ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_THORN); // þ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_THORN); // Þ htmlCharacters.add(HtmlCharacters.SYMBOL_SMALL_TILDE); // ˜ // htmlCharacters.add(HtmlCharacters.SYMBOL_MULTIPLICATION); // × htmlCharacters.add(HtmlCharacters.SYMBOL_TRADEMARK); // ™ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_ACUTE_ACCENT); // ú htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_ACUTE_ACCENT); // Ú htmlCharacters.add(HtmlCharacters.SYMBOL_UP_ARROW); // ↑ htmlCharacters.add(HtmlCharacters.SYMBOL_UP_DOUBLE_ARROW); // ⇑ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_CIRCUMFLEX_ACCENT); // û htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_CIRCUMFLEX_ACCENT); // Û htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_GRAVE_ACCENT); // ù htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_GRAVE_ACCENT); // Ù htmlCharacters.add(HtmlCharacters.LETTER_UPSILON_SYMBOL); // ϒ // htmlCharacters.add(HtmlCharacters.LETTER_SMALL_UPSILON); // υ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_UPSILON); // Υ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_UMLAUT_MARK); // ü htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_UMLAUT_MARK); // Ü htmlCharacters.add(HtmlCharacters.LETTER_SMALL_XI); // ξ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_XI); // Ξ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_Y_ACUTE_ACCENT); // ý htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_Y_ACUTE_ACCENT); // Ý htmlCharacters.add(HtmlCharacters.SYMBOL_YEN); // ¥ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_Y_UMLAUT_MARK); // ÿ htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_Y_UMLAUT_MARK); // Ÿ htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ZETA); // ζ // htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ZETA); // Ζ } /** * @return List of HTML characters managed by this error. */ @Override protected List<HtmlCharacters> getHtmlCharacters() { return htmlCharacters; } /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // If math tags are present, don't report the error List<PageElementTag> tags = analysis.getTags(PageElementTag.TAG_WIKI_MATH); if ((tags != null) && (!tags.isEmpty())) { for (PageElementTag tag : tags) { int index = tag.getBeginIndex(); if ((analysis.getSurroundingTag(PageElementTag.TAG_WIKI_NOWIKI, index) == null) && (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SOURCE, index) == null) && (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, index) == null)) { return false; } } } // If math templates are present, don't report the error List<PageElementTemplate> templates = analysis.getTemplates("math"); if ((templates != null) && (!templates.isEmpty())) { return false; } return super.analyze(analysis, errors, onlyAutomatic); } }