/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.Collection; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementCategory; import org.wikipediacleaner.api.data.PageElementExternalLink; import org.wikipediacleaner.api.data.PageElementImage; import org.wikipediacleaner.api.data.PageElementInternalLink; import org.wikipediacleaner.api.data.PageElementInterwikiLink; import org.wikipediacleaner.api.data.PageElementLanguageLink; import org.wikipediacleaner.api.data.PageElementTag; import org.wikipediacleaner.api.data.PageElementTemplate; import org.wikipediacleaner.i18n.GT; /** * Algorithm for analyzing error 10 of check wikipedia project. * Error 10: Square brackets not correct end */ public class CheckErrorAlgorithm010 extends CheckErrorAlgorithmBase { public CheckErrorAlgorithm010() { super("Square brackets not correct end"); } private final static String REJECTED_CHARS = "\n[{"; /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // Analyze contents by looking for [[ String contents = analysis.getContents(); int maxLength = contents.length(); int currentIndex = contents.indexOf("[["); boolean result = false; while (currentIndex >= 0) { boolean shouldCount = true; if (shouldCount) { PageElementInternalLink link = analysis.isInInternalLink(currentIndex); if ((link != null) && (link.getBeginIndex() == currentIndex)) { shouldCount = false; } } if (shouldCount) { PageElementImage image = analysis.isInImage(currentIndex); if ((image != null) && (image.getBeginIndex() == currentIndex)) { shouldCount = false; } } if (shouldCount) { PageElementCategory category = analysis.isInCategory(currentIndex); if ((category != null) && (category.getBeginIndex() == currentIndex)) { shouldCount = false; } } if (shouldCount) { PageElementLanguageLink link = analysis.isInLanguageLink(currentIndex); if ((link != null) && (link.getBeginIndex() == currentIndex)) { shouldCount = false; } } if (shouldCount) { PageElementInterwikiLink link = analysis.isInInterwikiLink(currentIndex); if ((link != null) && (link.getBeginIndex() == currentIndex)) { shouldCount = false; } } if (shouldCount) { PageElementExternalLink link = analysis.isInExternalLink(currentIndex + 1); if ((link != null) && (link.getBeginIndex() == currentIndex + 1)) { shouldCount = false; } } if (shouldCount) { if ((analysis.isInComment(currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_NOWIKI, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH_CHEM, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SCORE, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SOURCE, currentIndex) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, currentIndex) != null) || (analysis.isInTag(currentIndex) != null)) { shouldCount = false; } } if (shouldCount) { PageElementTemplate template = analysis.isInTemplate(currentIndex + 2); if ((template != null) && (contents.startsWith("]]", template.getEndIndex()))) { shouldCount = false; } } if (shouldCount) { if (errors == null) { return true; } result = true; // Check if there is a potential end int tmpIndex = currentIndex + 2; boolean errorReported = false; boolean finished = false; while (!finished && (tmpIndex < maxLength)) { char tmpChar = contents.charAt(tmpIndex); if (REJECTED_CHARS.indexOf(tmpChar) >= 0) { finished = true; } else if (tmpChar == ']') { int tmpIndex2 = tmpIndex + 1; while ((tmpIndex2 < maxLength) && (contents.charAt(tmpIndex2) != ']') && (REJECTED_CHARS.indexOf(contents.charAt(tmpIndex2)) < 0)) { tmpIndex2++; } String suffix = ""; if ((tmpIndex2 < maxLength) && (contents.charAt(tmpIndex2) == ']')) { suffix = contents.substring(tmpIndex + 1, tmpIndex2 + 1); } else { tmpIndex2 = tmpIndex; } CheckErrorResult errorResult = createCheckErrorResult( analysis, currentIndex, tmpIndex2 + 1); // Check if the situation is something like [[http://....] (replacement: [http://....]) boolean protocolFound = PageElementExternalLink.isPossibleProtocol(contents, currentIndex + 2); if (protocolFound) { errorResult.addReplacement(contents.substring(currentIndex + 1, tmpIndex2 + 1)); } errorResult.addReplacement(contents.substring(currentIndex, tmpIndex + 1) + "]" + suffix); if (suffix.length() > 0) { errorResult.addReplacement(contents.substring(currentIndex, tmpIndex) + suffix + "]"); } errors.add(errorResult); errorReported = true; finished = true; } else if (tmpChar == '}') { int lastChar = tmpIndex; if ((lastChar + 1 < maxLength) && (contents.charAt(lastChar + 1) == '}')) { lastChar++; } CheckErrorResult errorResult = createCheckErrorResult( analysis, currentIndex, lastChar + 1); errorResult.addReplacement(contents.substring(currentIndex, tmpIndex) + "]]"); errorResult.addReplacement("{{" + contents.substring(currentIndex + 2, tmpIndex) + "}}"); errors.add(errorResult); errorReported = true; finished = true; } tmpIndex++; } // Default if (!errorReported) { CheckErrorResult errorResult = createCheckErrorResult( analysis, currentIndex, currentIndex + 2); errorResult.addReplacement("", GT._("Delete")); errors.add(errorResult); } } currentIndex = contents.indexOf("[[", currentIndex + 2); } // Analyze each internal link to see if it contains a [ for (PageElementInternalLink link : analysis.getInternalLinks()) { String text = link.getText(); if (text != null) { text = cleanText(text); if (text != null) { if (errors == null) { return true; } result = true; CheckErrorResult errorResult = createCheckErrorResult( analysis, link.getBeginIndex(), link.getEndIndex()); errorResult.addReplacement(PageElementInternalLink.createInternalLink( link.getLink(), link.getAnchor(), text)); errors.add(errorResult); } } } // Analyze each image to see if it contains a [ for (PageElementImage image : analysis.getImages()) { String text = image.getDescription(); String modifiedText = cleanText(text); String alt = image.getAlternateDescription(); String modifiedAlt = cleanText(alt); if ((modifiedText != null) || (modifiedAlt != null)) { if (errors == null) { return true; } result = true; CheckErrorResult errorResult = createCheckErrorResult( analysis, image.getBeginIndex(), image.getEndIndex()); errorResult.addReplacement(image.getDescriptionReplacement( (modifiedText != null) ? modifiedText : text, (modifiedAlt != null) ? modifiedAlt : alt)); errors.add(errorResult); } } // Analyze each external link to see if it has a [ before for (PageElementExternalLink link : analysis.getExternalLinks()) { int begin = link.getBeginIndex(); if (link.hasSquare()) { if ((begin > 0) && (contents.charAt(begin - 1) == '[')) { int end = link.getEndIndex(); if ((end >= contents.length()) || (contents.charAt(end) != ']')) { if (errors == null) { return true; } result = true; CheckErrorResult errorResult = createCheckErrorResult( analysis, begin - 1, begin); errorResult.addReplacement("["); errors.add(errorResult); } } } } return result; } /** * @param originalText Original text. * @return Cleaned up text (or null if no cleanup required). */ private String cleanText(String originalText) { if (originalText == null) { return null; } StringBuilder sb = null; int index = 0; int singleBracketsCount = 0; while (index < originalText.length()) { boolean doubleBrackets = originalText.startsWith("[[", index); if (doubleBrackets || !originalText.startsWith("[", index)) { boolean ok = true; if (originalText.startsWith("]", index)) { doubleBrackets = originalText.startsWith("]]", index); if (!doubleBrackets) { if (singleBracketsCount > 0) { singleBracketsCount--; } else { ok = false; if (sb == null) { sb = new StringBuilder(originalText.substring(0, index)); } } } } int count = doubleBrackets ? 2 : 1; if (ok && (sb != null)) { sb.append(originalText.substring(index, index + count)); } index += count; } else { singleBracketsCount++; boolean paired = false; int index2 = index + 1; while (!paired && (index2 < originalText.length())) { if (originalText.startsWith("]", index2) && !originalText.startsWith("]]", index2)) { paired = true; } index2++; } if (!paired) { if (sb == null) { sb = new StringBuilder(originalText.substring(0, index)); } } else if (sb != null) { sb.append('['); } index++; } } return (sb != null) ? sb.toString() : null; } }