/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.Collection; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.data.MagicWord; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementExternalLink; import org.wikipediacleaner.api.data.PageElementFunction; import org.wikipediacleaner.api.data.PageElementTag; import org.wikipediacleaner.api.data.PageElementTemplate; import org.wikipediacleaner.api.data.PageElementTemplate.Parameter; import org.wikipediacleaner.api.data.PageElementTitle; /** * Algorithm for analyzing error 105 of check wikipedia project. * Error 105: Headline should start with "=" */ public class CheckErrorAlgorithm105 extends CheckErrorAlgorithmBase { public CheckErrorAlgorithm105() { super("Headline should start with \"=\""); } /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // Check every "=" at the end of a line boolean result = false; String contents = analysis.getContents(); int maxLen = contents.length(); int currentIndex = 0; while (currentIndex < maxLen) { int tmpIndex = currentIndex; boolean errorFound = false; while ((tmpIndex < maxLen) && (contents.charAt(tmpIndex) == '=')) { tmpIndex++; } if ((tmpIndex > currentIndex + 1) && // At least 2 "=" (tmpIndex < maxLen) && (contents.charAt(tmpIndex) == '\n')) { errorFound = true; } int nextIndex = Math.max(currentIndex + 1, tmpIndex + 1); // Ignore in comments if (errorFound) { if (analysis.isInComment(currentIndex) != null) { errorFound = false; } } // Ignore if part of an unbalanced title if (errorFound) { PageElementTitle title = analysis.isInTitle(currentIndex); if ((title != null) && (title.getSecondLevel() <= title.getFirstLevel())) { errorFound = false; } } // Ignore in some tags if (errorFound) { if ((analysis.getSurroundingTag(PageElementTag.TAG_HTML_CODE, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH_CHEM, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_NOWIKI, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_PRE, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SCORE, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SOURCE, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_TIMELINE, currentIndex) != null)) { errorFound = false; } } // Functions used instead of some tags if (errorFound) { PageElementFunction function = analysis.isInFunction(currentIndex); if ((function != null) && (function.getMagicWord() != null) && (MagicWord.TAG.equals(function.getMagicWord().getName()))) { int functionIndex = function.getBeginIndex(); while ((functionIndex < contents.length()) && (contents.charAt(functionIndex) == '{')) { functionIndex++; } while ((functionIndex < contents.length()) && (Character.isWhitespace(contents.charAt(functionIndex)))) { functionIndex++; } if ((functionIndex < contents.length()) && (contents.startsWith(function.getFunctionName(), functionIndex))) { functionIndex += function.getFunctionName().length(); while ((functionIndex < contents.length()) && (Character.isWhitespace(contents.charAt(functionIndex)))) { functionIndex++; } while ((functionIndex < contents.length()) && (contents.charAt(functionIndex) == ':')) { functionIndex++; } while ((functionIndex < contents.length()) && (Character.isWhitespace(contents.charAt(functionIndex)))) { functionIndex++; } int endIndex = -1; int pipeIndex = contents.indexOf('|', functionIndex); if ((pipeIndex > 0) && ((endIndex < 0) || (pipeIndex < endIndex))) { endIndex = pipeIndex; } int curlyIndex = contents.indexOf('}', functionIndex); if ((curlyIndex > 0) && ((endIndex < 0) || (curlyIndex < endIndex))) { endIndex = curlyIndex; } if (endIndex > 0) { String tagName = contents.substring(functionIndex, endIndex); if (tagName.equalsIgnoreCase(PageElementTag.TAG_WIKI_TIMELINE)) { errorFound = false; } } } } } // Ignore if it's a template parameter "=" between parameter name and value if (errorFound && (tmpIndex == currentIndex + 1)) { PageElementTemplate template = analysis.isInTemplate(currentIndex); if (template != null) { Parameter param = template.getParameterAtIndex(tmpIndex); if (param != null) { int valueIndex = param.getValueStartIndex(); if (valueIndex >= tmpIndex) { errorFound = false; } } } } // Ignore "=" at the end of external links if (errorFound) { PageElementExternalLink link = analysis.isInExternalLink(currentIndex); if ((link != null) && !link.hasSquare()) { errorFound = false; } } // Compute line beginning int beginLine = currentIndex; if (errorFound) { while ((beginLine > 0) && (contents.charAt(beginLine - 1) != '\n')) { beginLine--; } } // Ignore in tables if (errorFound) { if ((beginLine > 0) && (contents.charAt(beginLine) == '|')) { errorFound = false; } } // Signal error if (errorFound) { if (errors == null) { return true; } result = true; // Create error CheckErrorResult errorResult = createCheckErrorResult( analysis, beginLine, tmpIndex); // Suggest possible replacements if (contents.charAt(beginLine) == '=') { int equalsBefore = beginLine; while ((equalsBefore < tmpIndex) && (contents.charAt(equalsBefore) == '=')) { equalsBefore++; } int equalsAfter = tmpIndex; while ((equalsAfter > beginLine) && (contents.charAt(equalsAfter - 1) == '=')) { equalsAfter--; } if (equalsBefore - beginLine != tmpIndex - equalsAfter) { errorResult.addReplacement( contents.substring(beginLine, equalsAfter) + contents.substring(beginLine, equalsBefore)); errorResult.addReplacement( contents.substring(equalsAfter, tmpIndex) + contents.substring(equalsBefore, tmpIndex)); } else { int extraBefore = equalsBefore; while ((extraBefore < tmpIndex) && (contents.charAt(extraBefore) == ' ')) { extraBefore++; } boolean extraBeforeFound = false; while ((extraBefore < tmpIndex) && (contents.charAt(extraBefore) == '=')) { extraBefore++; extraBeforeFound = true; } if (!extraBeforeFound) { extraBefore = equalsBefore; } int extraAfter = equalsAfter; while ((extraAfter > beginLine) && (contents.charAt(extraAfter - 1) == ' ')) { extraAfter--; } boolean extraAfterFound = false; while ((extraAfter > beginLine) && (contents.charAt(extraAfter - 1) == '=')) { extraAfter--; extraAfterFound = true; } if (!extraAfterFound) { extraAfter = equalsAfter; } if (extraBeforeFound || extraAfterFound && (extraAfter > extraBefore)) { errorResult.addReplacement( contents.substring(beginLine, equalsBefore) + contents.substring(extraBefore, extraAfter) + contents.substring(equalsAfter, tmpIndex)); } } } errors.add(errorResult); } currentIndex = nextIndex; } return result; } }