/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.check.HtmlCharacters;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElementTag;
import org.wikipediacleaner.api.data.PageElementTemplate;
/**
* Algorithm for analyzing error 11 of check wikipedia project.
* Error 11: HTML named entities
*/
public class CheckErrorAlgorithm011 extends CheckErrorAlgorithmHtmlNamedEntities {
/**
* List of HTML characters managed by this error.
*/
private final List<HtmlCharacters> htmlCharacters;
public CheckErrorAlgorithm011() {
super("HTML named entities");
htmlCharacters = new ArrayList<HtmlCharacters>();
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_ACUTE_ACCENT); // á
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_ACUTE_ACCENT); // Á
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_CIRCUMFLEX_ACCENT); // â
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_CIRCUMFLEX_ACCENT); // Â
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_AE); // æ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_AE); // Æ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_GRAVE_ACCENT); // à
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_GRAVE_ACCENT); // À
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ALPHA); // α
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ALPHA); // Α
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_RING); // å
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_RING); // Å
htmlCharacters.add(HtmlCharacters.SYMBOL_ALMOST_EQUAL); // ≈
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_TILDE); // ã
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_TILDE); // Ã
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_A_UMLAUT_MARK); // ä
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_A_UMLAUT_MARK); // Ä
htmlCharacters.add(HtmlCharacters.SYMBOL_DOUBLE_LOW_9_QUOTATION_MARK); // „
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_BETA); // β
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_BETA); // Β
htmlCharacters.add(HtmlCharacters.SYMBOL_BROKEN_VERTICAL_BAR); // ¦
htmlCharacters.add(HtmlCharacters.SYMBOL_BULLET); // •
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_C_CEDILLA); // ç
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_C_CEDILLA); // Ç
htmlCharacters.add(HtmlCharacters.SYMBOL_CENT); // ¢
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_CHI); // χ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_CHI); // Χ
htmlCharacters.add(HtmlCharacters.SYMBOL_CLUB); // ♣
htmlCharacters.add(HtmlCharacters.SYMBOL_COPYRIGHT); // ©
htmlCharacters.add(HtmlCharacters.SYMBOL_CARRIAGE_RETURN_ARROW); // ↵
// htmlCharacters.add(HtmlCharacters.SYMBOL_DAGGER); // †
htmlCharacters.add(HtmlCharacters.SYMBOL_DOUBLE_DAGGER); // ‡
htmlCharacters.add(HtmlCharacters.SYMBOL_DOWN_ARROW); // ↓
htmlCharacters.add(HtmlCharacters.SYMBOL_DOWN_DOUBLE_ARROW); // ⇓
htmlCharacters.add(HtmlCharacters.SYMBOL_DEGREE); // °
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_DELTA); // δ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_DELTA); // Δ
htmlCharacters.add(HtmlCharacters.SYMBOL_DIAMOND); // ♦
htmlCharacters.add(HtmlCharacters.SYMBOL_DIVISION); // ÷
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_ACUTE_ACCENT); // é
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_ACUTE_ACCENT); // É
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_CIRCUMFLEX_ACCENT); // ê
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_CIRCUMFLEX_ACCENT); // Ê
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_GRAVE_ACCENT); // è
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_GRAVE_ACCENT); // È
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_EPSILON); // ε
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_EPSILON); // Ε
htmlCharacters.add(HtmlCharacters.SYMBOL_EQUIVALENT); // ≡
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ETA); // η
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ETA); // Η
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ETH); // ð
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ETH); // Ð
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_E_UMLAUT_MARK); // ë
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_E_UMLAUT_MARK); // Ë
htmlCharacters.add(HtmlCharacters.SYMBOL_EURO); // €
htmlCharacters.add(HtmlCharacters.LETTER_F_WITH_HOOK); // ƒ
htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_1_2); // ½
htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_1_4); // ¼
htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_3_4); // ¾
htmlCharacters.add(HtmlCharacters.SYMBOL_FRACTION_SLASH); // ⁄
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_GAMMA); // γ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_GAMMA); // Γ
htmlCharacters.add(HtmlCharacters.SYMBOL_GREATER_OR_EQUAL); // ≥
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_RIGHT_ARROW); // ↔
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_RIGHT_DOUBLE_ARROW); // ⇔
htmlCharacters.add(HtmlCharacters.SYMBOL_HEART); // ♥
htmlCharacters.add(HtmlCharacters.SYMBOL_HORIZONTAL_ELLIPSIS); // …
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_ACUTE_ACCENT); // í
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_ACUTE_ACCENT); // Í
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_CIRCUMFLEX_ACCENT); // î
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_CIRCUMFLEX_ACCENT); // Î
htmlCharacters.add(HtmlCharacters.SYMBOL_INVERTED_EXCLAMATION_MARK); // ¡
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_GRAVE_ACCENT); // ì
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_GRAVE_ACCENT); // Ì
htmlCharacters.add(HtmlCharacters.SYMBOL_INFINITY); // ∞
htmlCharacters.add(HtmlCharacters.SYMBOL_INTEGRAL); // ∫
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_IOTA); // ι
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_IOTA); // Ι
htmlCharacters.add(HtmlCharacters.SYMBOL_INVERTED_QUESTION_MARK); // ¿
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_I_UMLAUT_MARK); // ï
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_I_UMLAUT_MARK); // Ï
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_KAPPA); // κ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_KAPPA); // Κ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_LAMBDA); // λ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_LAMBDA); // Λ
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_ANGLE_QUOTATION_MARK); // «
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_ARROW); // ←
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_DOUBLE_ARROW); // ⇐
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_DOUBLE_QUOTATION_MARK); // “
htmlCharacters.add(HtmlCharacters.SYMBOL_LESS_OR_EQUAL); // ≤
htmlCharacters.add(HtmlCharacters.SYMBOL_LOZENGE); // ◊
htmlCharacters.add(HtmlCharacters.SYMBOL_SINGLE_LEFT_ANGLE_QUOTATION); // ‹
htmlCharacters.add(HtmlCharacters.SYMBOL_LEFT_SINGLE_QUOTATION_MARK); // ‘
// htmlCharacters.add(HtmlCharacters.SYMBOL_EM_DASH); // —
htmlCharacters.add(HtmlCharacters.SYMBOL_MICRO); // µ
htmlCharacters.add(HtmlCharacters.SYMBOL_MIDDLE_DOT); // ·
// htmlCharacters.add(HtmlCharacters.SYMBOL_MINUS); // −
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_MU); // μ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_MU); // Μ
// htmlCharacters.add(HtmlCharacters.SYMBOL_EN_DASH); // –
htmlCharacters.add(HtmlCharacters.SYMBOL_NOT_EQUAL); // ≠
htmlCharacters.add(HtmlCharacters.SYMBOL_NEGATION); // ¬
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_N_TILDE); // ñ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_N_TILDE); // Ñ
// htmlCharacters.add(HtmlCharacters.LETTER_SMALL_NU); // ν
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_NU); // Ν
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_ACUTE_ACCENT); // ó
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_ACUTE_ACCENT); // Ó
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_CIRCUMFLEX_ACCENT); // ô
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_CIRCUMFLEX_ACCENT); // Ô
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_OE); // œ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_OE); // Œ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_GRAVE_ACCENT); // ò
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_GRAVE_ACCENT); // Ò
htmlCharacters.add(HtmlCharacters.SYMBOL_OVERLINE); // ‾
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_OMEGA); // ω
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_OMEGA); // Ω
// htmlCharacters.add(HtmlCharacters.LETTER_SMALL_OMICRON); // ο
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_OMICRON); // Ο
htmlCharacters.add(HtmlCharacters.SYMBOL_FEMININE_ORDINAL_INDICATOR); // ª
htmlCharacters.add(HtmlCharacters.SYMBOL_MASCULINE_ORDINAL_INDICATOR); // º
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_SLASH); // ø
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_SLASH); // Ø
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_TILDE); // õ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_TILDE); // Õ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_O_UMLAUT_MARK); // ö
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_O_UMLAUT_MARK); // Ö
htmlCharacters.add(HtmlCharacters.SYMBOL_PARAGRAPH); // ¶
htmlCharacters.add(HtmlCharacters.SYMBOL_PART); // ∂
htmlCharacters.add(HtmlCharacters.SYMBOL_PER_MILLE); // ‰
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_PHI); // φ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_PHI); // Φ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_PI); // π
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_PI); // Π
htmlCharacters.add(HtmlCharacters.LETTER_PI_SYMBOL); // ϖ
htmlCharacters.add(HtmlCharacters.SYMBOL_PLUS_OR_MINUS); // &plusm;
htmlCharacters.add(HtmlCharacters.SYMBOL_POUND); // £
// htmlCharacters.add(HtmlCharacters.SYMBOL_MINUTES); // ′
// htmlCharacters.add(HtmlCharacters.SYMBOL_SECONDS); // ″
htmlCharacters.add(HtmlCharacters.SYMBOL_PROD); // ∏
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_PSI); // ψ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_PSI); // Ψ
htmlCharacters.add(HtmlCharacters.SYMBOL_QUOTATION_MARK); // "
htmlCharacters.add(HtmlCharacters.SYMBOL_SQUARE_ROOT); // √
htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_ANGLE_QUOTATION_MARK); // »
htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_ARROW); // →
htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_DOUBLE_ARROW); // ⇒
htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_DOUBLE_QUOTATION_MARK); // ”
htmlCharacters.add(HtmlCharacters.SYMBOL_REGISTERED_TRADEMARK); // ®
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_RHO); // ρ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_RHO); // Ρ
htmlCharacters.add(HtmlCharacters.SYMBOL_SINGLE_RIGHT_ANGLE_QUOTATION); // ›
htmlCharacters.add(HtmlCharacters.SYMBOL_RIGHT_SINGLE_QUOTATION_MARK); // ’
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_S_CARON); // š
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_S_CARON); // Š
htmlCharacters.add(HtmlCharacters.SYMBOL_SECTION); // §
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_SIGMA); // σ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_SIGMA); // Σ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_SIGMAF); // ς
htmlCharacters.add(HtmlCharacters.SYMBOL_SPADE); // ♠
htmlCharacters.add(HtmlCharacters.SYMBOL_SUM); // ∑
htmlCharacters.add(HtmlCharacters.SYMBOL_SUPERSCRIPT_1); // ¹
htmlCharacters.add(HtmlCharacters.SYMBOL_SUPERSCRIPT_2); // ²
htmlCharacters.add(HtmlCharacters.SYMBOL_SUPERSCRIPT_3); // ³
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_SHARP_S); // ß
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_TAU); // τ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_TAU); // Τ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_THETA); // θ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_THETA); // Θ
htmlCharacters.add(HtmlCharacters.LETTER_THETA_SYMBOL); // ϑ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_THORN); // þ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_THORN); // Þ
htmlCharacters.add(HtmlCharacters.SYMBOL_SMALL_TILDE); // ˜
// htmlCharacters.add(HtmlCharacters.SYMBOL_MULTIPLICATION); // ×
htmlCharacters.add(HtmlCharacters.SYMBOL_TRADEMARK); // ™
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_ACUTE_ACCENT); // ú
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_ACUTE_ACCENT); // Ú
htmlCharacters.add(HtmlCharacters.SYMBOL_UP_ARROW); // ↑
htmlCharacters.add(HtmlCharacters.SYMBOL_UP_DOUBLE_ARROW); // ⇑
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_CIRCUMFLEX_ACCENT); // û
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_CIRCUMFLEX_ACCENT); // Û
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_GRAVE_ACCENT); // ù
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_GRAVE_ACCENT); // Ù
htmlCharacters.add(HtmlCharacters.LETTER_UPSILON_SYMBOL); // ϒ
// htmlCharacters.add(HtmlCharacters.LETTER_SMALL_UPSILON); // υ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_UPSILON); // Υ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_U_UMLAUT_MARK); // ü
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_U_UMLAUT_MARK); // Ü
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_XI); // ξ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_XI); // Ξ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_Y_ACUTE_ACCENT); // ý
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_Y_ACUTE_ACCENT); // Ý
htmlCharacters.add(HtmlCharacters.SYMBOL_YEN); // ¥
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_Y_UMLAUT_MARK); // ÿ
htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_Y_UMLAUT_MARK); // Ÿ
htmlCharacters.add(HtmlCharacters.LETTER_SMALL_ZETA); // ζ
// htmlCharacters.add(HtmlCharacters.LETTER_CAPITAL_ZETA); // Ζ
}
/**
* @return List of HTML characters managed by this error.
*/
@Override
protected List<HtmlCharacters> getHtmlCharacters() {
return htmlCharacters;
}
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if (analysis == null) {
return false;
}
// If math tags are present, don't report the error
List<PageElementTag> tags = analysis.getTags(PageElementTag.TAG_WIKI_MATH);
if ((tags != null) && (!tags.isEmpty())) {
for (PageElementTag tag : tags) {
int index = tag.getBeginIndex();
if ((analysis.getSurroundingTag(PageElementTag.TAG_WIKI_NOWIKI, index) == null) &&
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SOURCE, index) == null) &&
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, index) == null)) {
return false;
}
}
}
// If math templates are present, don't report the error
List<PageElementTemplate> templates = analysis.getTemplates("math");
if ((templates != null) && (!templates.isEmpty())) {
return false;
}
return super.analyze(analysis, errors, onlyAutomatic);
}
}